thkim0305 commited on
Commit
89af0dc
·
verified ·
1 Parent(s): 653cc5e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
  10. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
  19. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
  28. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
  37. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
  46. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7187654c3b2b4a311de981775f06ff9aaec06521ba6f2d55fe0cca74a083b1b8
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb851df31a265321fab4c368129ff8bdfbcfb7e63e72bb40c7b878badc6ba4d
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0571775d806cb5694d12bbbb59deb5715b149b625563380d3b53f0b2c4a0e406
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11bb9051b5c160af184c1a503e926ded05fb4faa720a3b2ac735df443714d0b7
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:242baa74ecd2e89b8949e441e1eb503db5855524efbfd97bc9c9e937ad7f281a
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28f97f86483164a291e4dfb7b1b7323fe209e49380184a1ab9edb5093a58c2c2
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50eb45b2ed832ed4563d161d284a70b3fd463cd5d377c0fe903be291076a7133
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:928ec6d986859b197e14f18c3d237108b5a867442d77ef59f22e5e74a1481437
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 17.835098266601562,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.2723,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 5.935124397277832,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3235,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 11.949447631835938,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6892,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 4.54430627822876,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.5368,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 16.603609085083008,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.9215,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 6.623965263366699,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.798,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 5.242029666900635,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.3287,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 3.1021950244903564,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1253,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 4.434370517730713,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5437,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 1.40633225440979,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4146,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 5.0599894523620605,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4193,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 3.8830926418304443,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.4138,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 4.712460517883301,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3751,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 3.2360763549804688,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.1469,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 2.5379559993743896,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.4358,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 8.039641380310059,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3871,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 0.877090334892273,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1099,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 2.0384771823883057,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.2192,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 11.140633583068848,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.8123,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 6.476287364959717,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3829,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 3.3182876110076904,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6873,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 2.2434990406036377,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2576,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 5.116442680358887,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2816,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 0.7951710820198059,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3147,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 0.8207076191902161,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3024,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5262842336903168.0,
190
+ "train_loss": 0.4599846029281616,
191
+ "train_runtime": 188.7148,
192
+ "train_samples_per_second": 1.06,
193
+ "train_steps_per_second": 0.265
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5262842336903168.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a99edaae05995e51b6df8fbbe76020d1d486ec42021f513ad5178460e3caf352
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea5209a560b65ca173a1db13c7882797c188b5f65cd986de2c86f36a11b818b
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6bd292c09e6962198a402a9c211d7fac264c0dbd9a6790e5fd52fed5cda0ea
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15284a64a6f8f75bd6b4569811f3589ea26f5f4201e28360866fdc2e53febe13
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5711178c5463f7ab46aa53c7536f6313fea20d64a6ab19d533937da9107ba298
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e45fd3aced541c26cc0e326088f986e7d84f3d20c3f8178f1de7b27303044c
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4aaa54fadfb0db8bc6921f83398260e95c0c7a12df124a447f18c8a99c608c7
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bf21825f399573276b614a5a9d8af30444b25aa7421f5be03a7df223b109e28
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 0.446603924036026,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.1359,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.10891813039779663,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0153,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 0.08321043103933334,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0524,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 1.2470159530639648,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0364,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 0.0992945060133934,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.082,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 1.0286638736724854,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0263,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 1.4307641983032227,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0744,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 0.12711121141910553,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0168,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 1.582565426826477,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0624,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 1.4729337692260742,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.048,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 0.009418491274118423,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0155,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 0.010489478707313538,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0151,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 1.960877537727356,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0259,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 0.009839626960456371,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.015,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 0.08140977472066879,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.0953,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 0.023913130164146423,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0145,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.580852746963501,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1049,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 0.009584111161530018,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0209,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 0.00926352571696043,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0158,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 0.022018805146217346,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0144,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 0.022947989404201508,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0272,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 0.031829629093408585,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0144,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 0.00818221177905798,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0142,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 0.017826130613684654,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1101,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 0.02396584488451481,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0145,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5308165734268928.0,
190
+ "train_loss": 0.042700646072626115,
191
+ "train_runtime": 188.7069,
192
+ "train_samples_per_second": 1.06,
193
+ "train_steps_per_second": 0.265
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5308165734268928.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a678ec3a1eebf6cd19b4a3a86fcfa17a0a3d1bff7e4b323c0e9b39aab849835d
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:002c0fd0d45d95aab8737555c4d1ab0d6d7268708d58af0a97d03cefe871a06a
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:046b63fd84fdf6ddc23d19f4dfe01cc64d7136a420c811c5306034ed8dff2828
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:776b2874adc5a806bfa2f0a2f117b8262c20d7fe6fd49bc20e85d61f785f03a0
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc57a934def0e07593f099a111cf66256aa931634f2b19e1d7ab37987939278
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a554f530ff8c965c942e193a07af99983d5d0c26ef275eff9d868203f414b78
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d095e74a0ace44252b160479986b0e2eee7fa4ca6ca4e8022714bf37b8d5a571
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:709a8cae47012ee03fc01a7f0662490660e0669cdaf3b30fd4faa18163209de5
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 7.620423316955566,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.469,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 1.0009398460388184,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0558,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 7.3740949630737305,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.2361,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 5.340544700622559,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.375,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 3.0642282962799072,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.1286,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 8.644904136657715,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3327,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 2.4434633255004883,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.492,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 2.4134719371795654,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.452,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 2.4050092697143555,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2468,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 2.544926643371582,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.2707,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 0.4770296514034271,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.094,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 4.732265949249268,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.2453,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 8.252790451049805,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.363,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 0.9622191786766052,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.1234,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 4.3495564460754395,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1236,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 6.503274440765381,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.7048,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.046657085418701,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.2559,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 6.2532219886779785,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.4904,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 5.003266334533691,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1991,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 7.434990406036377,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4284,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 11.749560356140137,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7576,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 0.8847981095314026,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0488,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 3.2899725437164307,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2683,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 10.162389755249023,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3719,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 3.9631645679473877,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2006,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5203228526379008.0,
190
+ "train_loss": 0.30934857606887817,
191
+ "train_runtime": 194.6247,
192
+ "train_samples_per_second": 1.028,
193
+ "train_steps_per_second": 0.257
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5203228526379008.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fbb9b3083f4adea56fa40d73ef2d276d34dd1141a1e0b879214fa5297f2a67e
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d2bbd2320db1c97e9b8c19641cc4013bd3bdbcb441849f2f5b71259205ef67
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cf9bee6f3298e1ddf6509dc4d30689d7f14c53515c656a5938157a99ce5038
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:847972a0a4cc0969f6079db8ca3e297823e60fdf891791df893b8be17a649f6d
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b735d618d85e4fcdf832aa2fa46fc1704052fa38a48cfffefe5432589d28adf
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:668b35e725756d711bf9548304118bdff091fc57487f08deec840e08de74ef0a
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ff73fbd2d92a8f6232c87ae094040bfb7e7ca7e47656fdcd9bce8f5823f4092
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bbb12f3d11eb2d3337fc2eddfec53dc06daec64b872efa8fc71743ee8239c95
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 12.495404243469238,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.779,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 6.629724502563477,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 7.101356029510498,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.7022,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 7.535829067230225,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.9247,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 2.729475259780884,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.4091,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 8.636240005493164,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8681,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 8.66524600982666,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.7492,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 3.2647829055786133,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4108,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 5.925101280212402,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6516,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 3.2308082580566406,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4492,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 4.235790729522705,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.6326,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 4.701728343963623,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.3093,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 8.327908515930176,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5139,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 4.828310966491699,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6246,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 7.388039588928223,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.8027,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 6.946700096130371,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5852,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 0.9494185447692871,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.3423,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 4.723056316375732,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6249,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 3.943333148956299,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2556,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 2.8990371227264404,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3113,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 5.895607948303223,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.687,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 2.503542184829712,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5024,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 6.042721748352051,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4905,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 1.758901834487915,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.7268,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 4.603387355804443,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2878,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5379871421235200.0,
190
+ "train_loss": 0.5656331896781921,
191
+ "train_runtime": 188.8767,
192
+ "train_samples_per_second": 1.059,
193
+ "train_steps_per_second": 0.265
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5379871421235200.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb08e16fa47f8165cdd04de2f96bf67351d5afb5e26d3062daaca4add4cacb6e
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2c7a3ebf6b75ad8d563055649dd9a3c67c241c89d20f465d03c9cea0290283e
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93c7d386f0503d5012cab1569ee8733707f70dba8032da855daf25e67911039b
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e4be141eba949984993e2c890086131a2d212ae51629882e4cb2e8a7ed944d
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b853c0ef95591cbd7c88b47800c735ac8c4fda60dfd5924f47714cf9d0b9f334
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0114a706b3459ce20699f3ed2de459d76ff071595681598c417000763c3845c
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6038955452646355952c1af52aa0938ec96a5155c97e81eeebbf3385676f6e3d
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dce5d8b262c68011e3e8b47d0940963239afc7c98986a7b48601fd1f7483f41
3
+ size 778341034
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 0.3974245488643646,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.319,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.6956784725189209,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0874,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 4.481583118438721,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3417,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 2.996901512145996,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.4057,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 3.626215696334839,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.1893,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 3.7142386436462402,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3213,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 3.844820737838745,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.252,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 0.8332847952842712,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4195,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 2.419525623321533,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.1124,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 4.048295497894287,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4657,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 1.1390035152435303,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.2091,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 9.533462524414062,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5139,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 8.884787559509277,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.72,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 11.820172309875488,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.7309,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 3.786125421524048,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.2382,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 4.292237758636475,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.516,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 4.905089378356934,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4936,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 4.8835859298706055,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.425,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 5.083212375640869,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.3309,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 3.6019129753112793,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4558,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 2.5178937911987305,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.3123,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 3.1368069648742676,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2818,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 2.9503908157348633,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2565,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 3.0946178436279297,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4001,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 7.015686988830566,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3228,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 6019810878029824.0,
190
+ "train_loss": 0.36482719421386717,
191
+ "train_runtime": 190.5862,
192
+ "train_samples_per_second": 1.049,
193
+ "train_steps_per_second": 0.262
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 6019810878029824.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4632c0a7954ad392899f25b025e91ff1a12fb835b79f8cc033da3d88b2763a89
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af08c046b78b082c9a7a6c1b948e2ffa1b866fcde980f8adbe1e06e2b3e767bb
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ae2b9cf0d05969ee3ed4f5adde29f1d459afe52b86afd14e56335252e22841e
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6afa1969e53cc2e368fc21263bc63819b02f23c15ed64631e61c910144d2eae9
3
+ size 778341886
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787b721dcc19ec1b987482642be00562f9b7c59cce7f7870cd1d113cab978fc0
3
+ size 778341034