thkim0305 commited on
Commit
ca33d77
·
verified ·
1 Parent(s): 0703b3a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json +378 -0
  10. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json +378 -0
  19. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json +378 -0
  28. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json +378 -0
  37. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json +378 -0
  46. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35db35281882d1f226fb057ce1dc1d10268ce76953bee3787606804b39d316ef
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4320beb5e2c354434681ed2d3b2dcdd78d536aedc3fba5f9f1e06943ec2e7c9
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8028b19b5f86ee47adabeb517b0cb8e6f55250d34905080c06d2df87bc90e326
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399ebbb40d73ecda84215e6074344b500ffade76f87bfea8a0d6d62346a108d5
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a73b669622db0d158ac1056f7ed6b435324a3c4b5f24e9a3bde9565770f36e5
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4562188f7b74a13d10b41b89b9eddab396cfb79214a4849a8637db03c488f361
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c21d1d0f631ea5682fb5747b169d1f49dea3a6af343afe5d51396cda5580892
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddd22f5b455184254d90300e38de573ee5573bc8ffd8cb32135fe7f168557e7b
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 4.899675369262695,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.4572,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.364077627658844,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.1723,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.9865665435791016,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6005,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 4.352550029754639,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.0405,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.1260665655136108,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.2097,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 4.107581615447998,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.3789,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.47090861201286316,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.381,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.8781032562255859,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1236,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 8.501681327819824,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.1681,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.3251969814300537,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.6099,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 3.748138189315796,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.0833,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.729949414730072,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.1654,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.2475837469100952,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4468,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.3473127484321594,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.2435,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 4.668135166168213,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.338,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 2.5174388885498047,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2791,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 2.1460764408111572,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4064,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 3.378509998321533,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.7993,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.6487655639648438,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1094,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 2.8206214904785156,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.4364,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 2.229614496231079,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.3985,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.4115321636199951,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1919,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 2.823225498199463,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3599,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.7314802408218384,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9801,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.0195244550704956,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0946,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 2.806351661682129,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.3121,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.7020225524902344,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.0508,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.1141129732131958,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.1508,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 2.0665926933288574,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1938,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.09458617866039276,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.5867,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.0511982440948486,
224
+ "learning_rate": 2e-05,
225
+ "loss": 2.7297,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 8.42614459991455,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.3084,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 4.046963691711426,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.8881,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 2.4904868602752686,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.3384,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.1527413129806519,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.3475,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.3058439493179321,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.5549,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.345022976398468,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.869,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.7008552551269531,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.2608,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 3.1025912761688232,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.6226,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 3.710395336151123,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.9768,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.3900620937347412,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.1941,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.23837120831012726,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.2005,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 5.000380516052246,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.593,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 2.397953748703003,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.7147,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.20643925666809082,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0407,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 3.6777753829956055,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.9327,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.6937133073806763,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.1758,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.2445141077041626,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.7757,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2126123943067648.0,
351
+ "train_loss": 0.6465311345365858,
352
+ "train_runtime": 271.5469,
353
+ "train_samples_per_second": 1.429,
354
+ "train_steps_per_second": 0.357
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2126123943067648.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8dec18e9ef0ae448deccaea823d6e0a2cca1306d431c07c7038536434f2756
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:004ea8c34b31af667eb1fb2bdd883169fb9ff673b0cc75384faaf1f7c3cb71bb
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec5da721e1b6c599c6c777932e40e1686f2e690d65f61fafe38ca32a3ce82f4c
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed02013c2ad5dc7d708c4242ec2ee481920b2c93c7dc9031ed9e9511ea59c47
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdb1c926f6643fd1fd6146b0365d0a68c6ec0ac9c150cc3d0e260b305cb7db58
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e0131f3ad1b687e7f496607cd4e14c1663182a9d79a2923b60a4aa694467d4f
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc25cbb650a8a7622723b96c175ad75da07b0f5f4eceaf959431699826cf152
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b064918db2915e992ca655eb974f163135c2d01f6c4cb93d91da3ddb56a3189
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.0385405458509922,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0186,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.06613821536302567,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0849,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 0.02067434787750244,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0054,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.06695201992988586,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0057,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.15570124983787537,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0129,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.05093451589345932,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0056,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.023744968697428703,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0124,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.00312516069971025,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0054,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.006616171449422836,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0262,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 0.005298080388456583,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0043,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.027790764346718788,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0025,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.036290887743234634,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0025,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.002953270450234413,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0039,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 2.2551519870758057,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.2375,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 0.18529418110847473,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.0103,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.886093258857727,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.1782,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.22934368252754211,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0129,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 2.155086040496826,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.2115,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.00891521479934454,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0413,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.008472035638988018,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0038,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.37362828850746155,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2332,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.011929775588214397,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.001,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.006870917044579983,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0206,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.016721265390515327,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0028,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.024881532415747643,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0027,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.1209266409277916,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8265,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.05023783817887306,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.1705,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.026383670046925545,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.0019,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.0019155156332999468,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.0006,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.8070590496063232,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.3128,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 3.526737689971924,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0995,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 1.8756895065307617,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.2206,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.004317080602049828,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.1284,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.28428155183792114,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.0199,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.006947671063244343,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.0336,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.08691083639860153,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0093,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.00405128812417388,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.3603,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.00506645767018199,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0006,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 0.02798837423324585,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.0018,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.011478321626782417,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.0014,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.15240181982517242,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.106,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.08322691917419434,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0052,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 6.921684265136719,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.4361,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.00928011815994978,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0099,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.7090333700180054,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.1191,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.10394242405891418,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.0202,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.04304055869579315,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0018,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 0.20713742077350616,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.0132,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2143940014768128.0,
351
+ "train_loss": 0.08341487873460829,
352
+ "train_runtime": 270.9164,
353
+ "train_samples_per_second": 1.432,
354
+ "train_steps_per_second": 0.358
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2143940014768128.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8f7f9d641a604368bf8d0e7f00091e0be9859ac869d4b8054b390fa12177e3f
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2d403dbdef0fd93809aadc60b9f7ae19c09b3cf80504dcaca4e989520caa280
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed0479ad680594c42187a2d37e97e45cb9ab21768df8b1d2ad47b75fc9363a3e
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c599dfa725ee23a478848ed8752f9bbe83b3dd481eb78ebf78eb6f1d05c68aac
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52c020b0f544305ea9115132a268be550f348fc8138874cca2695fad374b6040
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac62e7a55b58292ccb5668dfb8b1e461a0c99f652b9a0735b19dc2923299501
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:478bf976c320e8c5c811bb0fede5b8ddae2aaeb213094fef2bad1f8edd4ae8ff
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec3fcadb5bde76ead584df8fbc7c7fc89c48ca61b07f74e41f427c531412c6dd
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.07272805273532867,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.1917,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.7147791385650635,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.7121,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 3.7243456840515137,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.3252,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.936738133430481,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.1401,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.6566787958145142,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.7242,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 2.212766408920288,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.4594,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.575031042098999,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4458,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.3744853138923645,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1401,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 2.374159812927246,
70
+ "learning_rate": 2e-05,
71
+ "loss": 2.0944,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.35343337059021,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.1046,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.7141731977462769,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4615,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.3044542968273163,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.1688,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 2.698765754699707,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.9064,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.7965202927589417,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.1694,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.8384367227554321,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.0756,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.3519437909126282,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2709,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 2.9105238914489746,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.6573,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.6958692669868469,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.3584,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.4390473961830139,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2845,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.6055613160133362,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3944,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.7648600935935974,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.3706,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 2.231801748275757,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.2567,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.49553102254867554,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2562,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.6220466494560242,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.8419,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.9273183941841125,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3708,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.8283573389053345,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.433,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.1112104654312134,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.366,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.7584994435310364,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.2889,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 2.2555992603302,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.8371,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.22240255773067474,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.3454,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 0.875527024269104,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6807,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 1.1137892007827759,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.3103,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.6312543749809265,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.4288,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.4749165177345276,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.1926,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.6268261671066284,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.3698,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.9238345623016357,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.5883,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 2.378034830093384,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.1422,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.7543140053749084,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.4975,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 2.226060152053833,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.4604,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.058547962456941605,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.9361,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.3901329040527344,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.1839,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.6894042491912842,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.5983,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.11256992816925049,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.1,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.3978410959243774,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.7952,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.851767659187317,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.7418,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.6821492910385132,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.2547,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.10563112795352936,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.1207,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.140236735343933,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.6027,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 5055426831843328.0,
351
+ "train_loss": 0.6651097327163539,
352
+ "train_runtime": 434.3501,
353
+ "train_samples_per_second": 0.893,
354
+ "train_steps_per_second": 0.223
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 5055426831843328.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b8821c785135aa7681f831baee4c8faa6da0bf34abcf61a74c878a10678fc5a
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18fa74d12e99610bc0ba01fcc22126ffaa1e7889f91ec44904c61c9a84bb0197
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076e147af133bf2506e43a1eaf6d6c2f0188696b68debb8e594ea3dc13963f30
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:405a042f3c8bba329b51f7d518f0dfc03f6e32f11911d86934dd301ce82bb2f6
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c0be93ffc19967965abe2fef04a4f9b6c4b7bf10e0659f09f5704bbbc23cc55
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ff9c1bac57727b36b4f19b0a5528833b27575970e5abb9481eef6061d65761d
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988261b37d63335deb0ab84f3c8d6341bbb6c1433375cf8e6799d997da572465
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4cf5a7e90df055317a883596df00673fbebe9169a545fa5a2123618e7c68207
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 3.5025784969329834,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.6125,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.6424089670181274,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.3566,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 4.314863681793213,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.3257,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.1254048347473145,
35
+ "learning_rate": 2e-05,
36
+ "loss": 2.5388,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.760192632675171,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.3429,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 2.004739761352539,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.2,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.0539671182632446,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6582,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 4.009338855743408,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0883,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 4.920736312866211,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.8855,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.5970003604888916,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.0158,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 2.9520134925842285,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.726,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.7787227630615234,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.116,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 3.36810040473938,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.2355,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.9433608651161194,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6505,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 2.1001124382019043,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.7527,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 2.966327428817749,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.2433,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 3.646400213241577,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.3473,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.8998469114303589,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.7622,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.9765092134475708,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7605,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 2.9249725341796875,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.8117,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.4821889400482178,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.4398,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.9430099725723267,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6923,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.3418391942977905,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4962,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.5497465133666992,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.7783,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 4.256335258483887,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.774,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.0099101066589355,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.0536,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.356179118156433,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.9636,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 4.572417259216309,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.398,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 4.678338050842285,
210
+ "learning_rate": 2e-05,
211
+ "loss": 2.1435,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 3.575714588165283,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.9645,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 2.5277392864227295,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6022,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 3.385993003845215,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7407,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 5.360639572143555,
238
+ "learning_rate": 2e-05,
239
+ "loss": 2.6118,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 6.285584926605225,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.1991,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 3.385345697402954,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.0076,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.7984771728515625,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8717,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.8088620901107788,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.9135,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.2181267738342285,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.112,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 2.389383316040039,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.896,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 1.2696701288223267,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.5581,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 4.296065330505371,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.6235,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 5.560433387756348,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.7516,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.6488584280014038,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.7037,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.2957154512405396,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5997,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 2.889270305633545,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.9081,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 3.077247381210327,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.8997,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 4.6420979499816895,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.8319,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 2.454413890838623,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.9373,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2171896458117120.0,
351
+ "train_loss": 1.122876275445997,
352
+ "train_runtime": 270.0103,
353
+ "train_samples_per_second": 1.437,
354
+ "train_steps_per_second": 0.359
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2171896458117120.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23387506e7ecf8f3251f0025829a56baa1bf2f769ac9bfa4fdcc90e01a070768
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0acdddfcb37d7b530feeec8001857fec2a659c79f2cf1f4c55c44a9f66e5ae63
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cede0a0fcd2006ffaa32ece4315df4e5adb2865e91e0f13e75df63d528db0cb2
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb53b8d130aba5244f53331294cf05558d5bfa3cc6c8190cc13b230d1d64a46
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c08a07e6cd3316c5ec6b2ccd96a9617354d3e87001a2805ad3d3e3f83f88291
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0592e6c0f8a48ad175749a2b569c9f722087166f0892e342960007825ba90a03
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df776933e0bbc0cdc681ab9592b09d9fe10f92c523f0383b464947c0e593677
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f820f6c817a919f8f9d184c0b292b5664efef2f06579de05ae74f87c093aa64
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 2.518491506576538,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.8582,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.6445577144622803,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.1082,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.276530146598816,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.9062,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.4562397003173828,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.2294,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.4720100164413452,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.4722,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.8212082386016846,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3804,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.2318272590637207,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4925,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.513741374015808,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.9536,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.5804106593132019,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.8147,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.2517503499984741,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4827,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.6721848249435425,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3412,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.1781041622161865,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.872,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.1887950897216797,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5626,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.4811246395111084,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4312,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.3950275182724,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5554,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.8101319670677185,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2638,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.3041068315505981,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.154,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.0701684951782227,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.7192,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.6231181621551514,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7025,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.8748000860214233,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.6573,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.7263919711112976,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8162,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.2520265579223633,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.8266,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.34067195653915405,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4451,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.4540058374404907,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.6771,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.0151292085647583,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.8007,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.1358588933944702,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.2688,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.9416270852088928,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.431,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.288041591644287,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.3753,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.13528066873550415,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1003,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.8311398029327393,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.8292,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 2.2910356521606445,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7834,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.40395867824554443,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.5579,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.6555685997009277,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.3677,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.7282531261444092,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8838,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.5124473571777344,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.5091,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.059186339378357,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.228,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.1664392948150635,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.641,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.772824764251709,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.1895,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 0.9583086371421814,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.5631,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 2.7325258255004883,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.264,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.539401650428772,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.22,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.19405996799469,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6883,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.0464004278182983,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.5503,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 2.1325461864471436,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.0865,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 2.034447431564331,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.6971,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.3602426052093506,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.1031,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.8754698634147644,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.374,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.953580617904663,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.7177,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 5852858146619392.0,
351
+ "train_loss": 0.6841668984324661,
352
+ "train_runtime": 435.2797,
353
+ "train_samples_per_second": 0.891,
354
+ "train_steps_per_second": 0.223
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 5852858146619392.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:872bf8692b034370c1c4c0f005922a1a8759668f6bd24ab1b7a8b31e15575065
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:136f7c89ceb0d2c2c8c383833ddae35cdb2e3237ee03fcb28b38f411bfd09a50
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b964fe05e5bbbea46bfb3de2d1319ba929f4bbf38e97ba47c990b8674af5305d
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adf2d32495d65af59172b9ad8c17f0e066399ac7f6494c618e3b788278339274
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ffa0b5cd6e5c1d97b79db546d8c18cacf42c828b5cba45f2b950fea5897c56
3
+ size 794706058