thkim0305 commited on
Commit
ff1404c
·
verified ·
1 Parent(s): ad2c972

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json +392 -0
  10. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json +392 -0
  19. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json +392 -0
  28. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json +392 -0
  37. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json +392 -0
  46. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fa789cff960ae4ee81fa9ac6a075adcf63e56a3ec6c3ecd5699d4f789f3c213
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432f8736e99ab8b1e39223df1d48f017f6fd4f4f9643ca359d4b99141e4f4d74
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c8fc312b07e6987f1125962ff398f8861f24936ce222a149189568327cac67
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051032624bbe04510f16be6fdc206a54f23b25aa871002ca00fef79182e7c513
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d675c4b87e3db30f58b3688f502ecdfd3b852d0a28b2d067cc72ca763b08cd5a
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381ce3ba71ec63c166839e10280a43a1e9f31dc4d7fa46bef9e6fa158c302e49
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e8733bb21c24aa296509cc99e8f4f9b3259d52e55bf918ee4c9f835985258f
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89fe3d8b4f7d27dbc7e4251a76c701fd44c4ad0c7fadf149e72d9c82c2e77db3
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 9.423497200012207,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9426,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 6.242947101593018,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.4941,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 0.23292511701583862,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0161,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 3.2664003372192383,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0807,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 16.083553314208984,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.5403,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 13.203125,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.3032,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 9.931619644165039,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4531,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 12.197830200195312,
63
+ "learning_rate": 2e-05,
64
+ "loss": 2.0777,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 13.418411254882812,
70
+ "learning_rate": 2e-05,
71
+ "loss": 2.5869,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 7.835398197174072,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.2992,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.8039246201515198,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0811,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 7.014680862426758,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.1046,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 8.817255973815918,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8185,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.18149733543396,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.1982,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.396234154701233,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.7965,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 9.15268325805664,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5244,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 5.599765777587891,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.686,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 11.052386283874512,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.5529,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.8186702728271484,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.6692,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 4.434682846069336,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.5903,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 4.227272033691406,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2824,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 5.939194679260254,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.3968,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.5867003798484802,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.393,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 6.319277763366699,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.6232,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 2.46071195602417,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2416,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 3.9734086990356445,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.6859,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 1.4034255743026733,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.5754,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.3656154870986938,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.1789,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 4.575096130371094,
210
+ "learning_rate": 2e-05,
211
+ "loss": 2.5298,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.9366658329963684,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.1539,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.2089002132415771,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.5569,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.383746862411499,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.3214,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.41840168833732605,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.2939,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.619239330291748,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.6163,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 3.0113186836242676,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.5063,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.8169399499893188,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.3002,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 4.200991630554199,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.5329,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.631651520729065,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.6569,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.40286803245544434,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.0856,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.7753989696502686,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.9605,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.5652052164077759,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.3553,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 5.572319984436035,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.0,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.4662155210971832,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.1469,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 2.5111005306243896,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.9844,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 2.1454522609710693,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.3873,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.0339059829711914,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.2139,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 4.232385158538818,
336
+ "learning_rate": 2e-05,
337
+ "loss": 2.1749,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 1.8107025623321533,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.4807,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 4.099456787109375,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.5927,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.4381089210510254,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.2756,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2185214759534592.0,
365
+ "train_loss": 0.8263820886611939,
366
+ "train_runtime": 70.7407,
367
+ "train_samples_per_second": 5.654,
368
+ "train_steps_per_second": 1.414
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2185214759534592.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca00ad56c9180596acb64d02ed1c2c93b5167313f4ddacae17fe6151ab9c155
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5292459abad60fd048050fb5779edeaa54e68fd64f6d8f672e54415ffd378568
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c51878f3c0c54cdc54eb26bf35a2757735003b92f7da1689081ec193e89b87b
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95a9f87437d27a76a6cac22a0d23d1b90a201e43d7c601b4f2178ea763b5880d
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d126b766b20e0cad09d67d67cb29db4c25a011f24be748b251d8b5cf6288b90a
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ad85bffca2bf517e6a2294ecc419a949579749fae404a75f6f3de58d5c4be5
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e1e47d59fb16d75680a1f95491ce48c1a01605d42bde715afab635cd1fd4fc4
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8eb669eede9faee34f088d7edbeb9c774a119de291ab6f2ebf3c071d1346dc1
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.06926380842924118,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0983,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.14224855601787567,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5132,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.1999995708465576,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3034,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 1.366185188293457,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0975,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.047930456697940826,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0054,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 2.5389022827148438,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.1265,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 4.9087395668029785,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.3968,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 0.5400022864341736,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.2126,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.17520087957382202,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0546,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.15257954597473145,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0091,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.7264464497566223,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1006,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 0.2980533242225647,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0133,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 0.862314760684967,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0667,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.18892519176006317,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0091,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.21099655330181122,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.0126,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 0.5386524200439453,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0495,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 4.457860946655273,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.2528,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.006588024087250233,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0005,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 0.25489550828933716,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0122,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 1.0599042177200317,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0402,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 4.65377950668335,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.1825,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 5.81795072555542,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5606,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.06573299318552017,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0047,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.15902456641197205,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0069,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.09095041453838348,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0076,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 0.03288710489869118,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.1018,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.8101238012313843,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0885,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.018732983618974686,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.0025,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 0.014856848865747452,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.177,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.006819794420152903,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.0792,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.015503552742302418,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0177,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.02056262083351612,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0141,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.32354819774627686,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0082,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.28828832507133484,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.0255,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.011394195258617401,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.002,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.08238676935434341,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.004,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.0231951475143433,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0829,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.0049722520634531975,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0011,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.5577210187911987,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.1744,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.060515161603689194,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.0044,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.24229663610458374,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0266,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.003239632351323962,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0009,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.019305258989334106,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.5092,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.03198835998773575,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0061,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.4420487582683563,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0412,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 8.323692321777344,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.6126,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.013138143345713615,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0034,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.41053083539009094,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.0296,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.00738116167485714,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.0008,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.04163911193609238,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.0028,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2202644449853440.0,
365
+ "train_loss": 0.10307437002658844,
366
+ "train_runtime": 69.5445,
367
+ "train_samples_per_second": 5.752,
368
+ "train_steps_per_second": 1.438
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2202644449853440.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:475ad4672ec3aaec5117fffb04c560d3ac5d8223d0494c2bc0036bf8aeeb4787
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2073eda13fa87d39360d6fd49065146220dc0190e1004c319eb392aee6a48546
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd0388c3619655d1858ead432f4146aae463cdfad671f284a6b5fdff3e9e119e
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57bc54c8c645bd8a7a7d98994d2b745350a66ffc36b63c1b3485be984ab5a62c
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53ef0ae39d317134427d6d59afb8c58f3a573b7b4c9b00a837033c99411b1a27
3
+ size 778341034
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333195ee6334bf482e3581bf003c5a1b2bdfbda17a5bce80a1c5e88263b3d4a6
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:309ab816121588cffe22b87774005c82c50e3940c8226ade89e6c6edc2ed71f9
3
+ size 778341034
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4706bf42b3020bbe7065ab079de8559f0a6a271f8b90c7dc1a72ba00aaed5e7e
3
+ size 778341034
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.42672601342201233,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.1273,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 3.3164169788360596,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0253,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.0701144933700562,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.1081,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 3.5641934871673584,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.917,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 2.482414960861206,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.4307,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 3.699124336242676,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.2775,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.19477766752243042,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2769,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 0.49098673462867737,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.227,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 4.419643402099609,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6207,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.067509412765503,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.6495,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.7661958336830139,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1506,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 4.098026752471924,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.6787,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.8598406314849854,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.2822,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.6059610843658447,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4346,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.4412726163864136,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.2262,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.919790506362915,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3238,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.1158552169799805,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7178,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 1.9022879600524902,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.2602,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 1.8349590301513672,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.679,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 5.065424919128418,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.0498,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 2.6241636276245117,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.824,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.375793218612671,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1406,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 4.631248950958252,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9059,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.9230762124061584,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3381,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 3.146935224533081,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3658,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.643314242362976,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.4425,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 1.577388048171997,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.6525,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.5418776273727417,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.545,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.3298715353012085,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.2741,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.20194341242313385,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.5407,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.6116507053375244,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6341,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.1075984239578247,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.5064,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.6221591234207153,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.2389,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 1.730831503868103,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.353,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 1.8647536039352417,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.9568,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.6666922569274902,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.9053,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.4998483657836914,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0881,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.8337864279747009,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.3308,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.7664095759391785,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.144,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 3.212735176086426,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.9583,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 4.194940567016602,
294
+ "learning_rate": 2e-05,
295
+ "loss": 2.3249,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.21294273436069489,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0391,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 1.9137557744979858,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.6782,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.2017444372177124,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5466,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 3.704470157623291,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.9585,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.09351445734500885,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.1403,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.1362736225128174,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.4254,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.6755289435386658,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.2894,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 3.5850534439086914,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.7788,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.5499467849731445,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.16,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5196654105853952.0,
365
+ "train_loss": 0.6389807415008545,
366
+ "train_runtime": 132.4801,
367
+ "train_samples_per_second": 3.019,
368
+ "train_steps_per_second": 0.755
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5196654105853952.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3137dc1711718282da550f4845cd36baab7c46bbf11b76105eef0fead3e874c1
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:defed3f7caba8d08a8444293265279252bac754f91960c09be90e83e3f42290f
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e379d1ddbbac4869c911e4aa2261e7e062f1759e7a20a5e7ffe738d3b4ece2
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611b55b9aafd1b4e11c27bcf80a22e33756eef1bb1c1de102cf91676dd074463
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5576200561a80efa43148ce2e7831d31afb0e4696dd074723a8f63eea37962a3
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b02587ad1dd9f38df651a6f97486fbd2a5654af5e637c8fcaba6350c37ba07
3
+ size 360880622
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594714875d0223582d0341af340f41258ef6b01e6633ed2f1b31835f1fbcb28f
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9acc6f735922a0d67535d40544015c6d4cd1d2d4a9d1702e6bd5541a29358dce
3
+ size 360880106
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 11.701602935791016,
14
+ "learning_rate": 2e-05,
15
+ "loss": 3.2041,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.8877856731414795,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.8482,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.9416866302490234,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.9832,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 8.076401710510254,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.7619,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 9.02495288848877,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.1751,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 6.70341682434082,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.7742,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 9.935924530029297,
56
+ "learning_rate": 2e-05,
57
+ "loss": 2.1147,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 6.356525421142578,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.8387,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 4.266024112701416,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4111,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 9.628840446472168,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.1686,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 6.305365562438965,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5419,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 3.4979939460754395,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.4611,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.904814600944519,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5461,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 5.557872772216797,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4092,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 3.297398567199707,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5477,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 14.574881553649902,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.0394,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 2.348663806915283,
126
+ "learning_rate": 2e-05,
127
+ "loss": 2.8679,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 8.440045356750488,
133
+ "learning_rate": 2e-05,
134
+ "loss": 2.2365,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 6.058783531188965,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.8523,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 5.3567728996276855,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.1006,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 15.160209655761719,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.5179,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 10.718804359436035,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.9688,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.9706368446350098,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.554,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 4.072225093841553,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.1259,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 4.832661151885986,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.2787,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 4.9963531494140625,
189
+ "learning_rate": 2e-05,
190
+ "loss": 2.5343,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.30591192841529846,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.7795,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 3.4619483947753906,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.957,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.5531309843063354,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.753,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 3.0114293098449707,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.3273,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 2.4710679054260254,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.9157,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.4386227130889893,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.0244,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.135855197906494,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.8826,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.907819986343384,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.173,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 2.032712459564209,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.9032,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.388005495071411,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.9712,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 3.9849488735198975,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.2745,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 7.177330493927002,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.5408,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.1500422954559326,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.589,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 3.8292155265808105,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.8088,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.9888606667518616,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.1827,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 2.494753122329712,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.0098,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 1.3061927556991577,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.1736,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 1.5556581020355225,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.6252,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 1.5772370100021362,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.7691,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 4.533968448638916,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5719,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 2.4935195446014404,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7637,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.4782304763793945,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.3888,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 2.847276210784912,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.1708,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 3.366889715194702,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.3215,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2232466265866240.0,
365
+ "train_loss": 1.15478422164917,
366
+ "train_runtime": 72.3437,
367
+ "train_samples_per_second": 5.529,
368
+ "train_steps_per_second": 1.382
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2232466265866240.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93f65beb7bbd25136510c6187fc11e9ca55eaedcabba3b14033692495c02899c
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaeba29721fb62a98ac7f919a031d53e300c961e5a7541ddce722693348166e5
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec7698a9fcb5af2885504255c07c54acce127c28f1a81ddf7f9850673e0719ef
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d090f3e1e20b1615cc8945e72b93472544277b7189ef83362446f27ba8a0c30
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:591958c16e67ee2f5bf6f1fd0f51c4455c7585e8e45468af114daff5756e3268
3
+ size 778341034
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f300b973c4d938700cfca9fae9a52cf471ee38d054e0060f6d88b1dc551cfa20
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db0a6df166d5e9f5df8f18244a5ccc530772bebf1d2da9d366e863e05f2618d
3
+ size 778341034
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8104378821d6474f3a6c351716ffb4f1db29be362a71d37564a8be300b60419
3
+ size 778341034
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 0.5440601110458374,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3859,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 2.486060380935669,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.8744,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.5172414779663086,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.0364,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 1.87302565574646,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6833,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.2508922815322876,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5988,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 2.049877643585205,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.2546,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.000075101852417,
56
+ "learning_rate": 2e-05,
57
+ "loss": 1.0645,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 4.159180641174316,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0189,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.9412723779678345,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5621,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.991361141204834,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.406,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.989998698234558,
84
+ "learning_rate": 2e-05,
85
+ "loss": 2.1827,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.5404460430145264,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9485,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 1.243625521659851,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4014,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.017669200897217,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.1306,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.773820638656616,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.9518,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 3.3499462604522705,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8406,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 1.1517057418823242,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.5119,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 3.779822826385498,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.2535,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 1.4071242809295654,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.6125,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.478247880935669,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.1099,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 2.338315725326538,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.0911,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.6824209690093994,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.7209,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.2349324226379395,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9425,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 2.572331666946411,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.6688,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 2.569999933242798,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.6548,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 0.5937522053718567,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.0816,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.9653764367103577,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.4465,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.6579816937446594,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.2058,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 1.3638180494308472,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.7284,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 1.4289367198944092,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.8351,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.7946614027023315,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.6298,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.661765694618225,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.8354,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.3677399158477783,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.0779,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.8689314723014832,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.532,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 1.7545336484909058,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.8644,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.1046079397201538,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.4699,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.21095605194568634,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.593,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.4656115770339966,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3938,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.224704384803772,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.7324,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 1.486828088760376,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.5396,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.9084728956222534,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.554,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 0.9191360473632812,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6183,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.7869934439659119,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.3396,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.42631539702415466,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.401,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 1.4716026782989502,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.8724,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.6058681011199951,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5681,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 1.3189148902893066,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7323,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.9830902814865112,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.6886,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.8136429190635681,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.2411,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 3.1793293952941895,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.0366,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 6021565481222144.0,
365
+ "train_loss": 0.7584884357452393,
366
+ "train_runtime": 150.4749,
367
+ "train_samples_per_second": 2.658,
368
+ "train_steps_per_second": 0.665
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 6021565481222144.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b224a628b3734d5758dbf6796aed6dd04a1b1d0d81b99063131a5f87f032729
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0b171c9c908292b73b092761beef17fde357b8526a9689a4c1a431bef82d5b
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef71ba45a93d892d931909dd81bac80459da02b0bf561f2c594f640c6c01992a
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b990fbebb884361e12c185520abbd6ce9ae62e5e4f4903c4863f11b8a265cc
3
+ size 778341886
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc9b6db8edd59dbbd02b347056aa744f332dd711a32e98dd04fd3259d4cf4c56
3
+ size 778341034