thkim0305 commited on
Commit
b069aa5
·
verified ·
1 Parent(s): 54b6f9d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
  10. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
  19. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
  28. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
  37. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
  46. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe1dbc65f417209d09344d4e4995df5d78c5eef26666b716f886d90d26de53b
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0166898c5d59908d3d4e636b1e8705ab438946e36b7f214e4c120674652f0647
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748f75b79de639a2a4af53a70cce66a8ca2082d031f11a4e62a32187924c4242
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf98a0fa6bcb6b3f4639a9d3ce8d0d4bc9f306fc5c94fe909c335ed5961f3391
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d679df9ca797825f3a82c1c09077f75eeace1c8de08ab30c5fa922069db709e1
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b197e79ebb6d8ae64ec6739b5cbf3cb2c42e1e175ddd75ee657f36a5e8f23cd
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc8ee78f3b179428383f77c08c7e1005b657efa43a78e8a1acbfab15a6f4244
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b0c72c61440e9ae802fcad9652d631f5f56aa289e047e253f7ee81d8019dfe
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 0.7212343215942383,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0439,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 11.383496284484863,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5268,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 0.927216112613678,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.337,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 3.203252077102661,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3553,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 0.08366145938634872,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0917,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 6.82559061050415,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6837,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 1.8658998012542725,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0646,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 1.4391404390335083,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0581,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 1.4982571601867676,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.3933,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 1.6830452680587769,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.1075,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 0.245327427983284,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0312,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 10.72057056427002,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.38,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 1.239396095275879,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0602,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 0.4094916880130768,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0428,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 1.428717851638794,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6015,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 0.11368861049413681,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.1259,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 1.6821730136871338,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1679,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 7.368831157684326,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.3051,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 4.640942096710205,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1196,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 0.3502234220504761,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.1621,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 2.8279569149017334,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.4888,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 0.6382105946540833,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1357,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 0.40106961131095886,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3606,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 5.146971702575684,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1506,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 0.3846442997455597,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0316,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5266647493378048.0,
190
+ "train_loss": 0.23302085906267167,
191
+ "train_runtime": 198.109,
192
+ "train_samples_per_second": 1.01,
193
+ "train_steps_per_second": 0.252
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5266647493378048.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c2aec2a71a2298864fb9cb71bbe157edfe31cb48c8758ed137adf555b57706c
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90b0271eee7daa4e6cf36df93827f4dd84b51c02fbc7eeb354a4584bc9eecc1c
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c319587b2f08331b839f2419acb0e4bfacc9c57991286ab2a77ea96d9feb5cbc
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bb6d41e065f1080f20435a320c1edfe1f3c8cba9f031e54d7610b9e367f8a9
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f72227369a7df43e68a0bb4081030172e220490ef080b56abc68a7bdce46ea6
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a435b97115a2feb785df092f369b70fa82f9fb72cf73f47bd559d40fa9b5a2a
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0390a2236b53f82449b5243e68ffc4c185e44bc9517abac645cbedc5678052c9
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecfbee4594e86c27822bb6a0d822dc075ba4815540812516b3e99f0069448cce
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 0.2633078396320343,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0962,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 18.888702392578125,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.2108,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 5.582844257354736,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.401,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 0.3722727596759796,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0165,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 0.6710987687110901,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0204,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 0.009970537386834621,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0149,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 0.994692862033844,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0285,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 0.025523852556943893,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.015,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 0.12230795621871948,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.1153,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 0.009217753075063229,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0512,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 2.2184362411499023,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1586,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 7.529627323150635,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.164,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 1.4156068563461304,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.1002,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 0.2617127001285553,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0166,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 0.015605290420353413,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.0145,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 0.057093504816293716,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.015,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 0.13609035313129425,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0151,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 0.03114943951368332,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.1649,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 0.026129912585020065,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0145,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 0.42378631234169006,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0229,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 1.1538219451904297,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0286,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 1.1634211540222168,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0363,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 0.014446967281401157,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0142,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 0.0895637720823288,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0152,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 0.36212360858917236,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0169,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5303430465716224.0,
190
+ "train_loss": 0.07068853974342346,
191
+ "train_runtime": 198.2611,
192
+ "train_samples_per_second": 1.009,
193
+ "train_steps_per_second": 0.252
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5303430465716224.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc38d3fec8aaf304a73dc0465221009621d12e35fe11437da6cdd8c90b9648e8
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4af31ee51a64d1dc941abff726f2d86b9aed76733dbd2124a460acb8f7fc752
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6760ac39e030ae9fa6ebc136bdc504c2239d72745f5b973ed81a1e0decdfc048
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e5eebbd87377d275d69626cccc34474e06e8611ca75b57099b23a759cb721b7
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1198bde519b9e1569c1f5d34b7cd75ade0f1ccdede78bf1e1d666c2185198bf0
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db7977fb93fec26d6d524d6bb78a926367418764df5ead8d7437c724c9d30d9
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e377ed9ce274f12fa63914553e2b4dd7b85b45fcaba55bde1fefdb0aee6e0f
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:154985bd935d4f05b0381b48c6606a5ef06bdc0053cdc7537f2dc22e5dc37771
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 3.617075204849243,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.1032,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.15696358680725098,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.1814,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 5.014780521392822,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.761,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 9.337811470031738,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.8625,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 6.394294261932373,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.6403,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 1.694501519203186,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.2702,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 27.820241928100586,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.3551,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 2.758286476135254,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4164,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 1.9265002012252808,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2844,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 3.5484347343444824,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3019,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 2.66292405128479,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.2797,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 3.7674098014831543,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.404,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 7.080770969390869,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4917,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 3.9330906867980957,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3915,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 3.2564377784729004,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.242,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 1.0977956056594849,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0854,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 4.018260478973389,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.5377,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 9.991900444030762,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5558,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 1.1954278945922852,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.6506,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 0.9688401818275452,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.1332,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 5.815992832183838,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.3631,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 2.7181556224823,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2599,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 4.442654132843018,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.191,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 7.31368350982666,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4697,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 7.1462225914001465,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3045,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5195491428007936.0,
190
+ "train_loss": 0.42144030570983887,
191
+ "train_runtime": 199.465,
192
+ "train_samples_per_second": 1.003,
193
+ "train_steps_per_second": 0.251
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5195491428007936.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7fc4f7efb25926bfa826673aa01335f815c127661a07f3cbb64a95c8fdffb40
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b399960eea88ae61a0a1aa3dfd3c1813abcdad91c1b4224ea8933067755a06
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89943c600b2b161ec6014448b900c8adf846847c4b9f68ffbf21de31f5e5d32c
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5699250baf94f3019eaeb1ef41db0f18f6dfd3e0c3bd0a112b411bd4170e0eb1
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472fbc1f43c917af0bb1f5e94c2286225b782951b640924fbaa37c723473a9ad
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c0e5b108c6b221edf19db70fe14233bf9b9b7422803f13e1c304e11f6e5911a
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c149fa6c092ba05bb37700e485f6699abc82de896528610d1635a195fc9c7a0
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59d5ce425a0645c183f6974a7833ce97c3e20569b720d8dec7176fff0fd0186
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 8.167108535766602,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.7666,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.970125138759613,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.4831,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 6.168637275695801,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3263,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 4.275420188903809,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.4419,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 7.258407115936279,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.7066,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 5.875009059906006,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8123,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 3.180713653564453,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2229,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 1.822489619255066,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.245,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 8.324261665344238,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6389,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 4.086201190948486,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.1874,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 4.469182968139648,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.6376,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 9.68209171295166,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.4976,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 15.532666206359863,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.6447,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 8.273395538330078,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3491,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 7.468406677246094,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6633,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 8.63184642791748,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8022,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 12.6749267578125,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.7509,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 4.348174571990967,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.2364,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 4.658010005950928,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.5582,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 3.064706802368164,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.7061,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 15.28968334197998,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.1366,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 4.18186092376709,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.3874,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 2.199023723602295,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4409,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 7.054344177246094,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9599,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 2.990053653717041,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3781,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5376319969820672.0,
190
+ "train_loss": 0.599202663898468,
191
+ "train_runtime": 198.5307,
192
+ "train_samples_per_second": 1.007,
193
+ "train_steps_per_second": 0.252
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5376319969820672.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9eb75c8782194720b59887465c995e11d72f78c8aca656c42c9b469a59124a9
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39a4efe80d568e273b5a623ab5c108b7e27d937316250436a587da562ac3d86
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f0af7ab102fbed3a844c53a4f7dcc5f21677451244983cab0caf1c747d76f5
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18987b20b7bbc6a88d60b2b91648ad3a08b577918fa565e2278a750aee69cc6d
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd496e40f9341b403fe665f4a3f2261ae72e39294e7a4678261659e562606ce
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8d9e327003fd39d6bdcc0eabd84abc83f519cb69d6abde49f8d1544c15881a6
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d90208da0627322e15237fe2b0bc8fccf40d8472da50b806d3941b4128ce6b8
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06dc84e7ab1330bf113bc7de877ec9cedc283cb04d0aa6de93816d56ad6d6cbe
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 4.40165376663208,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3787,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 3.2695400714874268,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6385,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 2.2764341831207275,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3251,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 2.679776191711426,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3256,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 2.267791748046875,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.2853,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 3.7516863346099854,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3355,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 4.205787181854248,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.3998,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 6.004105567932129,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.371,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 4.294463157653809,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4596,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 3.1517653465270996,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.403,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 3.2551352977752686,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3488,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 2.0919487476348877,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5908,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 2.9000701904296875,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.4376,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 0.4982464909553528,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.2203,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 3.5159378051757812,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.4652,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 1.462558627128601,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2046,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.924109935760498,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8077,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 3.89208722114563,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.3069,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 1.9117887020111084,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2479,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 4.854358673095703,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3298,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 4.46043586730957,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2984,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 7.219895362854004,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6064,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 1.80936861038208,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2453,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 2.554882287979126,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1651,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 3.7274348735809326,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.4726,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 6021332957396992.0,
190
+ "train_loss": 0.38677083015441893,
191
+ "train_runtime": 198.1718,
192
+ "train_samples_per_second": 1.009,
193
+ "train_steps_per_second": 0.252
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 6021332957396992.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0509b8ce6cd3ed751cfef1e396c7179a3f9088099d5cd2cb5b457b319dcebb
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f467a053337fecfa0f847cbc238ac4d4c5e851c9947c49765a915ebdf2da5f08
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a9446baf72a937c51e8e31dc17acd86d9cf2e6189c9082f6d32dc45a43491c
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7becdb1ad086320eb862ea9500d9bc7b91dd4e237a2a94fcd3b992d0c97c116b
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ec73292fd4cb63ea34768525c152e88f9998ee0c947f7d860eae16ddd53f7c
3
+ size 778341034