thkim0305 commited on
Commit
1545cb9
·
verified ·
1 Parent(s): fecc337

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
  10. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
  19. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
  28. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
  37. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
  46. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bf1bd922105b202c1332673b2650c3de98e3475fb786f5b8afb11368e5b6ecb
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7719e20ac179b3505bc16fe3d26557236f289d0aafd2a963fe447a9af6798b3
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c93ab2b8d890ac3fe7e1ea0de67b9123a0488d5bd7cb7109ea6c97edcf22c3d
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d18901eb260c181d30c1e1be564dd71da17a34f6bf6c333a89856341a4b907c
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85c6f07499b6d135cb2c16ce612f45f129974177654fe29012b34255fd4e71ec
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf6d5f3802665484ccd7c41c90f01c08c8eb716f3bee52a676c0a2ea8f4cafa
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8a5ad5233a4d504fc5a5368036b3332e83946a37160464ca568222cafaecdf
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766c6490fd2b03599de1b54601d6a6664ef9260808f2c2b8c964dff572c8a955
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 3.284128189086914,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0793,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 3.4005775451660156,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0915,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 1.777352213859558,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.043,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 4.783378601074219,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.8805,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 7.2065749168396,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.3896,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 1.6766301393508911,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6725,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 15.141353607177734,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6133,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 17.0766544342041,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.8163,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 9.064810752868652,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6379,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 11.561620712280273,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.3389,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 10.447920799255371,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1638,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 7.2391462326049805,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.4152,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 6.883174896240234,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.1845,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 2.764719247817993,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.2519,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 6.025578498840332,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1062,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 12.170540809631348,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.6212,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 7.470730781555176,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1638,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 11.806479454040527,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.8462,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 1.250801920890808,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1253,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 12.122519493103027,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3373,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 3.9410598278045654,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0798,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 3.405806064605713,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1707,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 29.996286392211914,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9968,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 5.970231533050537,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.2291,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 0.9755194187164307,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2356,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2184626743279616.0,
190
+ "train_loss": 0.3796170651912689,
191
+ "train_runtime": 115.2896,
192
+ "train_samples_per_second": 1.735,
193
+ "train_steps_per_second": 0.434
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2184626743279616.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6011d6315d2580827738242cee8ef75d254b4cea2dd8679d0473b7a9db54cc9f
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad508db3f5914917b8e2316d601bde72f9dd11c3f02fda6fd948437630f223e8
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5a375f55d8fb1c9f9b8b9d71e2f41433471b22a3f71c7c0ece807ff8703d3b
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0dbffa1ada429cae02a98572d4793e4f33e169588a8884543c9edfa076d3016
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4840c34ce37f38ac86c58160329347c4f31b8a0d2ca8c6f126c58a50979f8da0
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40a6e5dae1e8232e195fdc1aedc64fecf22d65266fc23ce9c8416e8884fb3793
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60bd84c9ce839d36a98d585b4eb739425ac0dda9019d51f2c95a11922d6e2fc0
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778f95e12407ad50b015c33069c9a39c1140bc5a2a856c0e9f7fff2badc47c7a
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 1.1501377820968628,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0192,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.30648428201675415,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0154,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 0.0933217778801918,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.1095,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 5.344268321990967,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0769,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 0.127670556306839,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0712,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 0.12052742391824722,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0189,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 0.04328983649611473,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0113,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 0.04012615233659744,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0542,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 0.18714116513729095,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0121,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 3.1324093341827393,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0418,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 0.05844057723879814,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0108,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 6.132259368896484,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.3944,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 0.0780622586607933,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0122,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 34.591758728027344,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6219,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 0.04694774001836777,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.0107,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 0.03775114193558693,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0645,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 0.4576091468334198,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0172,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 3.4563567638397217,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0723,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 3.871311902999878,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1694,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 0.24964451789855957,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.1908,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 0.06994156539440155,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0383,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 0.0806451290845871,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0183,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 4.901146411895752,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.1231,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 0.1762755960226059,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0135,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 5.6551384925842285,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0321,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2202126654636032.0,
190
+ "train_loss": 0.0887929368019104,
191
+ "train_runtime": 116.0987,
192
+ "train_samples_per_second": 1.723,
193
+ "train_steps_per_second": 0.431
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2202126654636032.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ac4e1058836b6234379b6e570fa3e71002444f037f47c23c6074c38371cca6
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c927ff8e1b3b4faaa34bde0c2692889ce1c086425706d7280a79dc051533dee
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f95ac4a906febc91c5cdb3813cde6eb88cfe3bc22ba6fb8af96611171ca1a688
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e262c5e68c4341c4604cab13775efef4ca2fa56dcb00f1b4eb61a6aac7db31c
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d86da56ef2fcb0770168449eb48fb8251868ad508baf093591f10e001ac71632
3
+ size 778341034
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00580e00a33f9296ac0a8c5158f4c03322b348cde61b769eaa5cd7738ec10606
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d7537fc2c48cf1786a1ee004cc1881ef12160aaa0d2cff908db07aa3085b32
3
+ size 778341034
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bbb7ca95f52ed9b394dbb71a9734c8caa2eb2cf282d47f60c1fa00311a262ed
3
+ size 778341034
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 3.62923264503479,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.2634,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 2.1570146083831787,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.2137,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 4.34098482131958,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.5009,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 9.01052474975586,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.2679,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 4.558963298797607,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.6763,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 5.2403740882873535,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.631,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 0.49892091751098633,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.321,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 4.3842668533325195,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5071,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 3.5485029220581055,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2333,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 8.232368469238281,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4089,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 1.4529104232788086,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.265,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 3.8828446865081787,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.2181,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 1.24514901638031,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3733,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 9.44221305847168,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4971,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 2.5544800758361816,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3479,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 7.6183319091796875,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2933,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 5.657953262329102,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.451,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 1.417629361152649,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.1971,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 2.1960906982421875,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.3496,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 4.147959232330322,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.8169,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 1.6373600959777832,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2783,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 1.473294973373413,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2325,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 4.739127159118652,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3456,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 1.90013587474823,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1663,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 5.234428405761719,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3906,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5202848019120128.0,
190
+ "train_loss": 0.36984447479248045,
191
+ "train_runtime": 190.6891,
192
+ "train_samples_per_second": 1.049,
193
+ "train_steps_per_second": 0.262
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5202848019120128.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d13906ad1f936437fd1ca3ff97f1ba801c71be2b6f272adbb73ae8ab17b6e26e
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38308af0d31e928983ecb45207689c09907a4de47c8986628f7604409ef92085
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a124f96018f2b50d800a1d7250ca46a03878d242a4c911ada9f11011f69b97
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fc1ef0d486b875990200b1dd094d84b2570ed11e53f6a9ee10f35d34f0f49ca
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4e079328106d0413c0e0836461aaaa74ddb45de0b53648b630bdc6a6632418
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7865e64171462c37128ea3d65f6aa1f8d3f8ad1fae133ed26642947ec64cc251
3
+ size 360880622
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4045eac0ab4825692e2c1f2596ed0574497bab56ebb98680bbfa7a3d0cc80c1
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db6ddaba6447528d3500e93c52fc8187a5fd82d3824af797deebd9999a4d9ac
3
+ size 360880106
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 7.067582607269287,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3825,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 4.283020973205566,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.2845,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 6.822042465209961,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.7916,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 9.666316986083984,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3412,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 16.465579986572266,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.8459,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 6.560152053833008,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6335,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 4.862478733062744,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6741,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 7.447726249694824,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5064,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 7.4205098152160645,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.9029,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 15.627077102661133,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8091,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 16.306581497192383,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7351,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 4.587651252746582,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.1841,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 19.92629623413086,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.5481,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 3.447114944458008,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3892,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 14.376614570617676,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6816,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 19.715835571289062,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.6575,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 17.579479217529297,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8478,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 8.443402290344238,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.2789,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 2.802513599395752,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.3138,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 8.755653381347656,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.7962,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 11.076048851013184,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6601,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 9.736947059631348,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.9945,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 9.903741836547852,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.6868,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 26.1480655670166,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.0493,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 10.546011924743652,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.8659,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2231176135704576.0,
190
+ "train_loss": 0.6744278335571289,
191
+ "train_runtime": 114.3015,
192
+ "train_samples_per_second": 1.75,
193
+ "train_steps_per_second": 0.437
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2231176135704576.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169b161921bb01bf190d8f33849b33f9c52e7d430415f5e5a702607ff51d21b0
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f3654c2de0c19271419c744626b570b8ee010b77749be74aa4c396bf6a43bc
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f9067a5bd4fbe9beae979d71170b2c1e372c67acd5023c3e986430948e2719
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b889c9885f7c81ae50f155aebf41c60c283c525132b24699311c6c6f7f12500a
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7771d615a3c16f065855527c013f6e19250c242746206ff6b6a409e79a45f3
3
+ size 778341034
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c87725e139a42e875b6819300a7f140a858cd2d2db35b382faba6ce53d8e3d7
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a336edf31a143a41bf64944c4e3b5afa7f26ca65509fd134b08f280f6dfc3e35
3
+ size 778341034
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ab51c825d382c8d805b7a1989808541ebe02284dfc053fd0859047835f96957
3
+ size 778341034
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 2.316213369369507,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.4539,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 3.5787057876586914,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5031,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 3.0300209522247314,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.514,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 3.986267328262329,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.4123,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 5.813876152038574,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5409,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 1.9155079126358032,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3129,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 1.6175196170806885,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2733,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 3.6581971645355225,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.3098,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 3.621964931488037,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4081,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 2.364119052886963,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.1815,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 4.366921424865723,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7291,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 6.259946823120117,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5821,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 3.531437635421753,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5318,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 9.066308975219727,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.9163,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 3.3480992317199707,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.4263,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 2.767130136489868,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2811,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 5.080353736877441,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.3363,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 1.7336921691894531,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5843,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 2.0092973709106445,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.4642,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 3.8756134510040283,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4807,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 2.4722533226013184,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.3807,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 1.6810188293457031,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.3137,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 3.1720468997955322,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4738,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 1.5610110759735107,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.2826,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 2.7629005908966064,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.554,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 6020445073571840.0,
190
+ "train_loss": 0.4498666000366211,
191
+ "train_runtime": 191.3091,
192
+ "train_samples_per_second": 1.045,
193
+ "train_steps_per_second": 0.261
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 6020445073571840.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ff9214659409561dfbaa016ea376607a875853b6d1a4c0b5696d32eaa6435b
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d40603d57ef7a099d0a49f40d6c03d0849f0e70990ada3d6564746d8a047708
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d185e922421d0f321483a04cf9ab247f33a88bae7fe6478e14d587ec62b615
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7676ca765d23fc6a1f0a8ab3064c4017ae1ec8dccfe4446023c38f944052a7f
3
+ size 778341886
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e47f376b87f03095f830e87fa9eb4f84f5a0695a2984230753c25d7ad5a96b
3
+ size 778341034