thkim0305 commited on
Commit
0d658b8
·
verified ·
1 Parent(s): b6876a4

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
  10. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
  19. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
  28. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
  37. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
  46. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d15399f0edd1f91b4a2f47beace1738d4cf8d5dfaf2b45b7305d79b9bbfb21d
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:428d26a3c9c1a279a65ec5b0271ef9313bcd1f8fbe7136f6fb36e13a611444be
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05cb0bc452be8c9b338be41388d1f2439111df0c7b8092f1ba38101984a33837
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7b33073b2d4e01b918ce6d556504297b87242a7269ecd544dd4a98da48dc7b7
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f18e59a96390eb2de1672fb33aea5d2558b0f3efc4ddd9f390822d557c52c42
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba7ce9936a84aad1f88255fc48671eccf33c457c95b6877beedce99407c7636
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca8e4bf48ae2c4fadc9a0324efef2e5fec14ec7cfeeccc0ac29b6813d8bb1b1
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54258bf82a4d6ed0bdd02ff79c8dce338184fbab57262563baae4a52792eb45c
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 20.356369018554688,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.581,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 8.037093162536621,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3628,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 9.684240341186523,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.4963,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 13.099488258361816,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.5486,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 24.806184768676758,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.2746,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 12.736031532287598,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.4477,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 8.170167922973633,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2535,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 6.432157039642334,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1452,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 2.9350321292877197,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4796,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 5.1196184158325195,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.5928,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 8.371356964111328,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5357,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 10.118005752563477,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5303,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 8.173060417175293,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.5055,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 8.020771980285645,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.2534,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 3.0385186672210693,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3931,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 6.484845161437988,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5293,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.5226542949676514,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1518,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 3.8682730197906494,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.1667,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 15.682740211486816,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.9048,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 3.051661252975464,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.2347,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 4.373608112335205,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.8616,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 1.7194321155548096,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1839,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 8.273662567138672,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2419,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 4.200174808502197,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1707,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 4.162522315979004,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.4122,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2184907602264064.0,
190
+ "train_loss": 0.530304090976715,
191
+ "train_runtime": 112.0178,
192
+ "train_samples_per_second": 1.785,
193
+ "train_steps_per_second": 0.446
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2184907602264064.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e0290f254e1384dc1b37b17cd1a2fb8a4a8bf7a62e1995251d0c059fe3036a
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6f460736d1f5f9bcd282f631bd6fb842444343cf3d88d95d4f800b704aa5da1
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01463999f5efe8d57300fee5391f52db0442384ea286c5d9d33b02743486a83d
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c68012f74213cbac7a5c1607e2e1e0f117a9c05b2885cc5b05407538fb77bc2f
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e3fce46215518ed15bbadbfadf2e2c58fe601c749ff93d6fcb857c43f5208f1
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4158de6adb3088e4e92a8742807ff01855816aa9185bd87b5850e634ea97bb34
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4152f5720d2ccf17af9d3d343bd5f35a6af271f91a7685a8af2139b6784fdfdc
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a2902e50a5d007c58fc2031cd980d500510dba91d1f562e637011185bec243
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 1.7138079404830933,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.1539,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.025427162647247314,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0094,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 0.03731539845466614,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0288,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 4.670400619506836,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0634,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 0.008197980001568794,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0131,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 1.292601466178894,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0206,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 1.6769487857818604,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2863,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 0.16871647536754608,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0143,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 16.038557052612305,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2802,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 1.5586574077606201,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0268,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 0.052973054349422455,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.013,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 0.4838956296443939,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0118,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 2.2010490894317627,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0197,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 0.02648143470287323,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0095,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 0.015569723211228848,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1625,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 0.07271519303321838,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0134,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 12.36640739440918,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1975,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 0.011994317173957825,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0186,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 0.07830987125635147,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0099,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 0.245117649435997,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0106,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 0.09306483715772629,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0116,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 1.065843105316162,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0152,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 0.019034242257475853,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0096,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 0.014849173836410046,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0973,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 0.41532012820243835,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0126,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2203723937873920.0,
190
+ "train_loss": 0.060387180894613264,
191
+ "train_runtime": 113.8328,
192
+ "train_samples_per_second": 1.757,
193
+ "train_steps_per_second": 0.439
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2203723937873920.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55e9d3fd73bbdcad52830e269e2f3893da64867291b0aee9a0e4955d33c75627
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a85b9f1aa53edaad8c7a91bca1a3c70f20eb871f8dd521b068458c7141fc76c
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a567a7777a1278ff501e2a4345b52e9eb9aeb5e0a651e43d5f8898fd830e16
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8291d8f62e0d8059d91b1002ed6f1e1830c888064a6e305bc691db4e3a7c5be
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d467059117666b4bb77ae3dfce1433137ba25364531e1f1648c6de90643de6cd
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86f50e0f4809ed2809088768d554284ed9990868438e53b93eed85ac993822a
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0199d3ec64caff6ce480c8099992a2541bbee8a3b53ba12c681a7a19ab123831
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b7717a34391c3b362458f2148b4270e4a5682bd69eda667d4aa4d3b9e4ad72e
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 8.67927360534668,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5073,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 1.070449709892273,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0556,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 5.557805061340332,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.179,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 5.5926971435546875,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.4056,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 3.1736576557159424,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.137,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 8.38901424407959,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3255,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 2.2440390586853027,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4894,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 2.3557586669921875,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.424,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 2.672835350036621,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.272,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 2.5702593326568604,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.2785,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 0.3932490646839142,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0989,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 4.769493579864502,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.254,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 5.8169331550598145,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3336,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 0.9339334964752197,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.1087,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 4.087733745574951,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1163,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 7.105349063873291,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.7171,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 2.8530874252319336,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.2644,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 6.338785648345947,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5313,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 5.0571136474609375,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1923,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 7.593774795532227,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4613,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 11.923439979553223,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7395,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 0.8395790457725525,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0462,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 3.3043808937072754,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2629,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 9.646453857421875,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3714,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 4.387805461883545,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2219,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 5203228526379008.0,
190
+ "train_loss": 0.31173837661743165,
191
+ "train_runtime": 189.427,
192
+ "train_samples_per_second": 1.056,
193
+ "train_steps_per_second": 0.264
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 5203228526379008.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4954490d41567c600c766cb6260cf6575e0faa298da32365174a9be869b521
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e9244b1cfbf88b52befc18b724e7cd671e4eb407aabd8884118c9e9e5212dd
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b6c22f2e8e6010de5e540eed7cd6d9544224ba58ebc34ec3b66db8fccdc0a6d
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67f6953201fb2a4dde70231b9bc251dcbcc1777c95f3625e0d02a3b94a895caa
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d843fd5a0b30e5d2423185c7f7c1087228c7ae93460fd1db50dff217bfa5ea9
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e46d20265302b91f3f24e0d48d23aeb7fa6a7185fc4e7b7ad9e7e02243bed549
3
+ size 360880622
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975b40293671bbdeb5d32200b2bd64f0890718dc69b9e7162e2fb26a36afe6a1
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d853326567172e3fdbcb84b8d3f5e7c1f022f8b7bc03a0334be7fcdcf3bab33
3
+ size 360880106
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 18.76683235168457,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0907,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 12.490086555480957,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.7188,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 15.25229549407959,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6253,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 22.94184684753418,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.2405,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 5.571078777313232,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.389,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 9.62364387512207,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.7317,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 7.308901309967041,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5968,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 4.2408223152160645,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5853,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 8.469420433044434,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6742,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 4.550015926361084,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4569,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 8.425732612609863,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.5865,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 10.20775318145752,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.4898,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 15.613037109375,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8773,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 7.8877410888671875,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.8102,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 11.263969421386719,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.8425,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 10.401885986328125,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.7032,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 8.376611709594727,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4214,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 3.5057554244995117,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.4259,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 14.423259735107422,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.474,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 5.601011753082275,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4506,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 8.93952465057373,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.795,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 7.401601314544678,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.4657,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 6.547579765319824,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.524,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 4.996316909790039,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.8237,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 14.771405220031738,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.9451,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 2233493086011392.0,
190
+ "train_loss": 0.6697625303268433,
191
+ "train_runtime": 113.9082,
192
+ "train_samples_per_second": 1.756,
193
+ "train_steps_per_second": 0.439
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 2233493086011392.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8f43e8703563b6486c90073e04b43b429fa3218145830d8a090d468ec437bd
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99730f58c467e3669a4d4713dd9b1451cff4b93a58ae46c46497eb8f731facfd
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6edd260c1245ae17fbbd8bdc254dc7d9c62b4151143c13392a97e053e4bc420f
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed393604bc7cd9f3ae87cd5c96a132a1a9ee3dba8c55b6a3a0aaa11c23731c2
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a06037709047bff4a95ef960ee58dbdfe9a84bcda95a019105db9e35084c5945
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21ba21d09b33136f5e652150ade2f8ff774b5b30eb0209d0e1b4eccfda6525f8
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ace8e0b834acde0cd7f0b3c99ab0c668c3019d4f8c61d1e257934541a3bb25
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d74bc1d4d6cd847082f58e5ab66e88b532413a1e07e096ea389507b3a50d0d
3
+ size 778341034
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 50,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08,
13
+ "grad_norm": 0.42625540494918823,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3228,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.16,
20
+ "grad_norm": 0.7395199537277222,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0953,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.24,
27
+ "grad_norm": 4.357167720794678,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.342,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.32,
34
+ "grad_norm": 3.030724048614502,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.453,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.4,
41
+ "grad_norm": 4.075497150421143,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.2039,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.48,
48
+ "grad_norm": 3.159764528274536,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.3253,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.56,
55
+ "grad_norm": 4.061643123626709,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2518,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 0.8145078420639038,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4027,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.72,
69
+ "grad_norm": 2.392146348953247,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.1145,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.8,
76
+ "grad_norm": 4.058164119720459,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.4689,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.88,
83
+ "grad_norm": 1.3680384159088135,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.2133,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.96,
90
+ "grad_norm": 8.989691734313965,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.486,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.04,
97
+ "grad_norm": 8.30005931854248,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.6934,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.12,
104
+ "grad_norm": 12.295815467834473,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.7844,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.2,
111
+ "grad_norm": 4.061346530914307,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.2592,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.28,
118
+ "grad_norm": 4.065572738647461,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5118,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.36,
125
+ "grad_norm": 3.6354613304138184,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.472,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.44,
132
+ "grad_norm": 5.219471454620361,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.413,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.52,
139
+ "grad_norm": 4.679935932159424,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.3027,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.6,
146
+ "grad_norm": 3.4444127082824707,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.4347,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.68,
153
+ "grad_norm": 2.349905252456665,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2921,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 1.76,
160
+ "grad_norm": 3.327099323272705,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2939,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 1.84,
167
+ "grad_norm": 2.9319257736206055,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.2592,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 1.92,
174
+ "grad_norm": 2.7675695419311523,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.4071,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 2.0,
181
+ "grad_norm": 7.027470588684082,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3092,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 2.0,
188
+ "step": 50,
189
+ "total_flos": 6019810878029824.0,
190
+ "train_loss": 0.3644888877868652,
191
+ "train_runtime": 192.4302,
192
+ "train_samples_per_second": 1.039,
193
+ "train_steps_per_second": 0.26
194
+ }
195
+ ],
196
+ "logging_steps": 2,
197
+ "max_steps": 50,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 1,
200
+ "save_steps": 500,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": false,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 6019810878029824.0,
214
+ "train_batch_size": 1,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c46f9b30ec1ddd0705e19e3a27e0ee5ae37bfc4c2d9a99b2d33be8dab34eab79
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bd3170badc104b1340132038527d6226113ca6eebce39a3f6699be2b0d1e46
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f97139241266520411df7976e108c2040ec519ca76fb4cb87b88ad7696cf6c6
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fe3a24e6bec05583d408402670efcf642e612602b2bff16363992dfcd4e812
3
+ size 778341886
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda87c5048819b2eb77ff1681af2797f1098905aae19565a8d5df6a3a59e038c
3
+ size 778341034