thkim0305 commited on
Commit
ae71b7c
·
verified ·
1 Parent(s): e1c3736

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json +392 -0
  10. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json +392 -0
  19. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json +392 -0
  28. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json +392 -0
  37. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json +392 -0
  46. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9391214ff1923b83752a8d3951b5d3401a5ec6d0246781854e0201f97f9f82b3
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ff8dd214a728fe9583f1d2f40c92851b977c13963f43257431bd0afcbbad2c
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bfe57805c7c5080f34d0a9b1d11f82f05e58fdce3169d5d4cab80a6b73d0ae
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:616a2c97b60a139f9cc9ddbc270586849cc31afc3bcce85ae5087b5fe4964eb3
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4882a2e3ee217c356fc14775ece3eb957872f6040ed76f9dd5fdf02601715dd
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3cbd61ffa32e5a09cab1b8df46c660a822400be159e93bb759a75e0c8897905
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbf69e6bab55d931c75b4629ca7f94f6d85e67a577f60d56d3ad0356e939e732
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01537df585cceb4f79ff0535872d97637653cd1913821410649382c6045014b
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 6.59199857711792,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5286,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 4.61942720413208,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.97,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 4.521786212921143,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3505,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.39112570881843567,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.115,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 3.506913900375366,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.669,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 7.119588375091553,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6728,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 4.159899711608887,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4872,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 5.473998069763184,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.3448,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 9.59962272644043,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.1658,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 5.037639617919922,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.437,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 3.210973024368286,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.2022,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 3.4891958236694336,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.3925,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 2.502493381500244,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5882,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.41307923197746277,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6127,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.3616442680358887,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6964,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 5.551742076873779,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2612,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 4.289650917053223,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.3091,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 13.974275588989258,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.8659,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.4527721405029297,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7772,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.5927505493164062,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.162,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.6561378836631775,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0605,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 4.918743133544922,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7217,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.39737483859062195,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.1857,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 14.40439224243164,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.4849,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.6556407809257507,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.1272,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 9.867362976074219,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.4252,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 2.099479913711548,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0928,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 4.53439474105835,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.3139,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 7.238653659820557,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.6104,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 1.539478063583374,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.1581,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.8394383192062378,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.4265,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.7109262943267822,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.1265,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 1.0773893594741821,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.2364,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 3.935823917388916,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.0373,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 7.714654922485352,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.6007,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 8.1398286819458,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8645,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 7.816657066345215,
266
+ "learning_rate": 2e-05,
267
+ "loss": 2.3697,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 7.676031589508057,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.59,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.7220346927642822,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9622,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 3.681994915008545,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.4974,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 7.639248847961426,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.9375,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 9.04971694946289,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.22,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 1.4018713235855103,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0818,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 4.88040828704834,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.7108,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 2.9837918281555176,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.3657,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.0587419271469116,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.3112,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 9.619867324829102,
336
+ "learning_rate": 2e-05,
337
+ "loss": 2.6249,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.8182443380355835,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.052,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 5.4298481941223145,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.983,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 1.9551576375961304,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.3735,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2722175404474368.0,
365
+ "train_loss": 0.6631992340087891,
366
+ "train_runtime": 215.6578,
367
+ "train_samples_per_second": 1.855,
368
+ "train_steps_per_second": 0.464
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2722175404474368.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cec14609ad32c290c19fe99cc2626c1573cbc85f114434a313145999b97e84
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df187ae799f263db8566f5385dc79e9c2623b1d6957e1928f94a77e61051ea6c
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae4134c8cf9bd49bd9e9d46343957346c41793b1be9d466e80cfecfcea6f24a
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5289d21311312be0fde45820e0a508ba5cbfd66bb88d64b7ec96bbf4065de637
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c1da2662d2df70b5cf7a9cbac9949d8be681b8e616a3e9f8bd8f573d3ae40e3
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cf4c5a1502c71922c0c95fafe0bced2fd2386a6dcb782663f81d80fe38eae27
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cabf8f804c392f7578109c0fc6aeb86b7f26cd200fd331d5756dd441870339
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058f762da3f1f0d383fcdfdbaa9c44ff0a75249cec5d1d061055f5c8bc46789d
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.849377453327179,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0279,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.02175198495388031,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0578,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.5898083448410034,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0414,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.41922903060913086,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0256,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.0018654951127246022,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0004,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.004117058124393225,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.1077,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.047437746077775955,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0009,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 1.518896460533142,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.349,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.045308228582143784,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0889,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.11829675734043121,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0026,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.005238677840679884,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4929,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 0.2110355943441391,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.008,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.4381576776504517,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0896,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 16.31382942199707,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.2398,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.05612191930413246,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.0021,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 0.22438876330852509,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0076,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.866287350654602,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0595,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.045663584023714066,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0009,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 0.2734965682029724,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0132,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.5363508462905884,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0329,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.382310688495636,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0192,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 8.9566011428833,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.3838,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.7171825170516968,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0176,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.6939023733139038,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0231,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.0025089113041758537,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0026,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 0.06536436825990677,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.2801,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.20245826244354248,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0073,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.018265284597873688,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.0028,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 0.13041305541992188,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1354,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.010534190572798252,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0962,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.2692118287086487,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0085,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.038933683186769485,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0019,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.018930355086922646,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0017,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.7976124286651611,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.0334,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.2943456470966339,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.0089,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.12224985659122467,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0067,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.3579569458961487,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0677,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.02683216519653797,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0016,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 3.3340158462524414,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.2447,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.009762264788150787,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.0083,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.024613451212644577,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0116,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.009135313332080841,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0008,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.2523867189884186,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.4089,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.32133349776268005,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0093,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.09947264194488525,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0039,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.12009107321500778,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.0069,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.007163307163864374,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0081,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.4114590883255005,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.066,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.04830743372440338,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.0046,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.04225878044962883,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.0019,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2743212791824384.0,
365
+ "train_loss": 0.0904453244805336,
366
+ "train_runtime": 221.9331,
367
+ "train_samples_per_second": 1.802,
368
+ "train_steps_per_second": 0.451
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2743212791824384.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be8024aa2eece99882c85b8046475baa83ad6283113525243ad4265e9bf7576
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce08ca256ac2358ea58f9bf1b6e1e689255df91a4f52dbeb33536a4dcc474533
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c586d85bfc4ed525de5cc0bcd8e9bb1def55fc41b70014035e84fbbdfc08625b
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d608fe1ce0b07b208d805224fc9aa2685c3edeca580c162648006f6425393528
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2072f36c89ed2416f35894c760f95c059d0947e40d2c851e6559e50ac1c610dd
3
+ size 487635186
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99426839dd4d5f67e775bb10a4c52b99e22e14e2de2c8b8d162e9059a49513cc
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8586e15be816067f89397fd74efde646b7f477c27e24148b917e3bfa10b0d3e
3
+ size 487635186
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd9c04cdc272cfff8bddd92cc280c2385a2797d1a2f3a196861109207940d86
3
+ size 487635186
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.3100306987762451,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3864,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 4.013766288757324,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9063,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 2.3978607654571533,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.1349,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 4.109842300415039,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.825,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 2.427557945251465,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5722,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 4.734681606292725,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.0887,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.10447093099355698,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5304,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.675856828689575,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.9177,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.6946303844451904,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4756,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 1.9363477230072021,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.7104,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.7683027982711792,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1338,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 5.035861492156982,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9689,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.8543343544006348,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.523,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 3.667877435684204,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3602,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 3.0984058380126953,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5042,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.3304017782211304,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2943,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.7662222385406494,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.5895,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.6082299947738647,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.1146,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.9511778354644775,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7031,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 3.04605770111084,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3958,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 3.8611412048339844,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8662,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 4.399799823760986,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.287,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 2.2531864643096924,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3218,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.4663535356521606,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.0107,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.7521519660949707,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.124,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 8.896435737609863,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8135,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 2.5376131534576416,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.511,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 6.814590930938721,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.0585,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.1924495697021484,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1282,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.17645888030529022,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.0467,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 2.022798538208008,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7755,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 3.513547897338867,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.0105,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.7813096046447754,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.3313,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 14.315655708312988,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.5528,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 3.795628309249878,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.5101,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.2288278341293335,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.374,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 4.386161804199219,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.1973,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 3.3581271171569824,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.6263,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.225866436958313,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.1825,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 7.104559898376465,
287
+ "learning_rate": 2e-05,
288
+ "loss": 1.1801,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 13.105498313903809,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.7541,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.2902798056602478,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0271,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 4.769597053527832,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.5574,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 3.8983893394470215,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.3119,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 3.6488382816314697,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.0188,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.29925721883773804,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5366,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 3.2376351356506348,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.6949,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.63726806640625,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.2721,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 5.418401718139648,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.4491,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.0275309085845947,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.5931,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5059805727162368.0,
365
+ "train_loss": 0.6651638150215149,
366
+ "train_runtime": 277.116,
367
+ "train_samples_per_second": 1.443,
368
+ "train_steps_per_second": 0.361
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5059805727162368.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57fec58061a92ab1b1bf4a4c6012fc1397a46c90b77813f7cdd58f7cb3fe2ed3
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3401ff987c93089fb123edc132faa3c3adae7422977093002adb8f8d9588d91
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac62e5e6fe65b335575f7ed7e9b1e3e428debec1fca2e10d9dc2123665b121b
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6901f0e6d410fbd830c0624606aa6c27c103266b25c9e6b95ab4a281cc6bfc6
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaaf2162f9a52ec84d0a5306af230ee2e2524d8b3ab4f9007c0eb79f88a34f95
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5e605e3d74782eebb64e65d6103a2c51abb8a7defe4b8c0cf0e4b3c764165a
3
+ size 302202238
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2765067dfd32667733c18170c3f622d661e26844689a324651bb49e31c0531eb
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a34222b6b3c027b0a0631a8b22b43c25f1371bfd82e5daf1543cf536e3c8723
3
+ size 302201386
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 4.537669658660889,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.832,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.7357145547866821,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6956,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 4.840099334716797,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.1172,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 6.74650239944458,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.2902,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 5.235438346862793,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.8775,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 7.184224605560303,
49
+ "learning_rate": 2e-05,
50
+ "loss": 2.0581,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 12.11132526397705,
56
+ "learning_rate": 2e-05,
57
+ "loss": 2.1586,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 5.101541042327881,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0109,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 4.515460014343262,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.7775,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 1.4222784042358398,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.935,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 4.021170139312744,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.8757,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.8799550533294678,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.809,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 0.9323534369468689,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4597,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.9174280166625977,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3155,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 3.026035785675049,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6032,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 8.995691299438477,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.9903,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.847169041633606,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.3635,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 9.795331001281738,
133
+ "learning_rate": 2e-05,
134
+ "loss": 2.0643,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 1.7796157598495483,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.4105,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 4.427263259887695,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.8952,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 15.724753379821777,
154
+ "learning_rate": 2e-05,
155
+ "loss": 2.4695,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 6.148226261138916,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.2556,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 1.515079140663147,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4941,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 1.410451889038086,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.2573,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 4.407503604888916,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.8896,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 8.208944320678711,
189
+ "learning_rate": 2e-05,
190
+ "loss": 2.3883,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 1.5512627363204956,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.6002,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 5.655065059661865,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.9759,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 2.2757630348205566,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.481,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 9.758919715881348,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.1768,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 4.106739521026611,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7397,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 4.5539231300354,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7899,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 3.6534392833709717,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.6942,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 4.095523357391357,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.1672,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 6.486148834228516,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.2393,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 8.842077255249023,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.7434,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 8.89773178100586,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.2772,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 18.22632598876953,
273
+ "learning_rate": 2e-05,
274
+ "loss": 3.3775,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 2.2898244857788086,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7762,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 7.303566932678223,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.6705,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 4.615819454193115,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.4729,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 3.424020290374756,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6039,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.209282636642456,
308
+ "learning_rate": 2e-05,
309
+ "loss": 2.1192,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.9839006662368774,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.9141,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 1.8658138513565063,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.9538,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 5.851518630981445,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.7171,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 3.3957180976867676,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.8128,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.1212002038955688,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.3877,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 3.886902093887329,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.8257,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 3.7381937503814697,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.7051,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2779357248487424.0,
365
+ "train_loss": 1.0703019142150878,
366
+ "train_runtime": 215.987,
367
+ "train_samples_per_second": 1.852,
368
+ "train_steps_per_second": 0.463
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2779357248487424.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69d4760f0c1959606186ed7e2dbc53f991bd01e88e3a73b20618c9bcbd214386
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4447cf5a3e3ba4fc20c85906454383543302b54a45eb6230e015b355a7c620c
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c4e788867c89e82a865f5c263bfc27372f9ad1c131ee5f73ba1257cb1358df0
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4132f75da66ae6d2d1d4e9d126dcf9907beda51bc8637a5d299f80f47e1dc1b
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ae2b5822fb41da2f51fa255253ef5fe5e0742b6941f2e9d52243a28d00e24f
3
+ size 487635186
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea8bb07c77a2cfd707f77600756cadf906298b5c948248212f3d5d1787fded51
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4d08d3691c5ba154f8ed2e0f222d7e52e43fe806519ae3751d1bec29a8d4105
3
+ size 487635186
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e3655617efa0261b43cf077e8d338e5d7409043547ee972bf9bb851282182c3
3
+ size 487635186
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 3.9793167114257812,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9603,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 2.5188374519348145,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.8116,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.3143935203552246,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6426,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 1.7295637130737305,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.7914,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.7666940689086914,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5176,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 1.7987310886383057,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.2534,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 2.515049695968628,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5228,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 4.360015869140625,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.2806,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.8536200523376465,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4718,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.595823287963867,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.5098,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 3.5519330501556396,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.5173,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.3497133255004883,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9349,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.930624008178711,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4916,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 3.4734158515930176,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.0893,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.4782814979553223,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.8819,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 3.521637439727783,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.9765,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.3706670999526978,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4721,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 5.157536506652832,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.672,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 1.5482473373413086,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.3766,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 5.12284517288208,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.2043,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 2.4150655269622803,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.9744,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.1841928958892822,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5246,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 5.618422985076904,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.2388,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 2.524454116821289,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.5944,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.7722740173339844,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.4012,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 4.978190898895264,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.5566,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 3.1553709506988525,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.945,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.8509775400161743,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.1817,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 3.637312173843384,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.7817,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 5.024761199951172,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0443,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.6964753866195679,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.0813,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.6202216148376465,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.8394,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.0826215744018555,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.6085,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 1.6744186878204346,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.5914,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 3.6541945934295654,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.8268,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.257369041442871,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.5257,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.468746542930603,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.5201,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 2.3300929069519043,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.035,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 3.127511739730835,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.8523,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.3533546924591064,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.3409,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 2.2223410606384277,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.5418,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 2.9352030754089355,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.7653,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.8046193718910217,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.3562,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.5213556289672852,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.484,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 2.6822187900543213,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8862,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 5.5709662437438965,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.9033,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 4.302905082702637,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.0037,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.346940279006958,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.4615,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 1.4154908657073975,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.2501,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 7.068972587585449,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.4809,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5838906972962816.0,
365
+ "train_loss": 0.7595100402832031,
366
+ "train_runtime": 275.7767,
367
+ "train_samples_per_second": 1.45,
368
+ "train_steps_per_second": 0.363
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5838906972962816.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7061a373da978d284035594b21fa90359b7f63246265529bf3bbd4424d47ab77
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9f5a3d5256c4d387533a530ca53a00edf9a3dd5b539d359ef244972f191740
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d2247c87168cd25007195226e5a2d0cc8da9ff1ad46d1e494b8a8a64bc353b
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd9f2bd53bd05c8e603b94af226a34333ecaf4dc93ab7e920e7ef6ffed5709e6
3
+ size 487636262
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa4f85fb161c9eb53205e83989ae34fdafca370bf153f995e1e6f4ac63320fdf
3
+ size 487635186