thkim0305 commited on
Commit
c563caf
·
verified ·
1 Parent(s): f63be40

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_trainer_state.json +189 -0
  10. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_trainer_state.json +189 -0
  19. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_trainer_state.json +189 -0
  28. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_trainer_state.json +189 -0
  37. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_trainer_state.json +189 -0
  46. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f06a89dfcac5961ac5bdb293a2a3f574fcb9216e4593500ab6c2155ff3df0313
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe982dd9d5a3ae03280e3447d8063dcc43629d7dca6b8e90b03907c76b90a58
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aedba7096d5b5406fd46033b0b93b844abbe3d7454189a2035107d0dbb00caaa
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0526023fc6ccb898eb90c77e6bdc99b569624d6470ad5b920a20e689c289ad2b
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a263e7e7ee03bfcf87feac35953e8aa2fa2ce0c0e3c56c5dffbcf6c8a7d52721
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e10816fae9b78236f55f93f29814d10a19539fd654a0c3e9737cbd47cd8d9d
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4335cc459cd1552cda05f02b928ed4417cd0d799ff11e7ef4d08a16e0b8484
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c206007eb99dbc9905215c8f5443524bbcf2827728a8be2f97d0edee647ca575
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 43,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09302325581395349,
13
+ "grad_norm": 2.9576117992401123,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.2411,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.18604651162790697,
20
+ "grad_norm": 1.2031710147857666,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0578,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.27906976744186046,
27
+ "grad_norm": 2.9653682708740234,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3511,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.37209302325581395,
34
+ "grad_norm": 4.665686130523682,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.1305,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.46511627906976744,
41
+ "grad_norm": 2.068437337875366,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.1576,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.5581395348837209,
48
+ "grad_norm": 0.7609916925430298,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.1547,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.6511627906976745,
55
+ "grad_norm": 0.8752633333206177,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2439,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.7441860465116279,
62
+ "grad_norm": 0.7228463292121887,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0869,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.8372093023255814,
69
+ "grad_norm": 0.5220616459846497,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.1898,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.9302325581395349,
76
+ "grad_norm": 0.5391299724578857,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0884,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 1.0232558139534884,
83
+ "grad_norm": 0.4390765428543091,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.038,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 1.1162790697674418,
90
+ "grad_norm": 4.568640232086182,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.2474,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.2093023255813953,
97
+ "grad_norm": 4.5196356773376465,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3904,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.302325581395349,
104
+ "grad_norm": 0.23186014592647552,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.101,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.3953488372093024,
111
+ "grad_norm": 2.5624356269836426,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1768,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.4883720930232558,
118
+ "grad_norm": 0.4649677872657776,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0719,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.5813953488372092,
125
+ "grad_norm": 0.7176182270050049,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1396,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.6744186046511629,
132
+ "grad_norm": 2.2132463455200195,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.087,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.7674418604651163,
139
+ "grad_norm": 1.706496238708496,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2997,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.8604651162790697,
146
+ "grad_norm": 0.48359882831573486,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0237,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.9534883720930232,
153
+ "grad_norm": 0.40984606742858887,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0393,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 2.0,
160
+ "step": 43,
161
+ "total_flos": 4777205812953088.0,
162
+ "train_loss": 0.15616962521575217,
163
+ "train_runtime": 217.1153,
164
+ "train_samples_per_second": 0.792,
165
+ "train_steps_per_second": 0.198
166
+ }
167
+ ],
168
+ "logging_steps": 2,
169
+ "max_steps": 43,
170
+ "num_input_tokens_seen": 0,
171
+ "num_train_epochs": 1,
172
+ "save_steps": 500,
173
+ "stateful_callbacks": {
174
+ "TrainerControl": {
175
+ "args": {
176
+ "should_epoch_stop": false,
177
+ "should_evaluate": false,
178
+ "should_log": false,
179
+ "should_save": false,
180
+ "should_training_stop": false
181
+ },
182
+ "attributes": {}
183
+ }
184
+ },
185
+ "total_flos": 4777205812953088.0,
186
+ "train_batch_size": 1,
187
+ "trial_name": null,
188
+ "trial_params": null
189
+ }
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c6fce6889d0763e53ec11b925ecf780872259115e0a4de278fecb1730f1c44
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:818822b11989459a5fc595b2555903333d4fbd06ec209d9f8fd739991ecd393f
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c441dfd77124cafa5afe9a4f3e30bd524ebfeb667ce4cb78133a3d18595567b1
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76093b70c93eb8976c70ba3b52470568b0f3d0896ec2dec92615014a4a25ae3e
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e267c53893530bd8784e742b230c103f0af6e3be5e71601f6620f4c0100934
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:225727d72f520f2af545e163e03222702bf08e8084ca44d12dd0337c3ad2899d
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51d18abf0982eaad9817b60cffb3c40f99da4761429ac0c24f6149861969da4c
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e2e06f880660e7b5ffca36b45892d7f2ed89797c6ff687b9dc8da9431886cfa
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 43,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09302325581395349,
13
+ "grad_norm": 0.3085244596004486,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.1226,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.18604651162790697,
20
+ "grad_norm": 3.019911289215088,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.2883,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.27906976744186046,
27
+ "grad_norm": 1.109711766242981,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.1198,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.37209302325581395,
34
+ "grad_norm": 0.4532826840877533,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0175,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.46511627906976744,
41
+ "grad_norm": 0.8678146600723267,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0323,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.5581395348837209,
48
+ "grad_norm": 2.618272066116333,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.1189,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.6511627906976745,
55
+ "grad_norm": 0.20907337963581085,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0141,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.7441860465116279,
62
+ "grad_norm": 1.467644214630127,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1211,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.8372093023255814,
69
+ "grad_norm": 1.8505585193634033,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0966,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.9302325581395349,
76
+ "grad_norm": 0.5534336566925049,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0249,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 1.0232558139534884,
83
+ "grad_norm": 0.42154374718666077,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0169,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 1.1162790697674418,
90
+ "grad_norm": 0.37012800574302673,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0338,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.2093023255813953,
97
+ "grad_norm": 0.9166113138198853,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0618,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.302325581395349,
104
+ "grad_norm": 0.27272695302963257,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0284,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.3953488372093024,
111
+ "grad_norm": 0.2518395781517029,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.0126,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.4883720930232558,
118
+ "grad_norm": 0.7915253043174744,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0801,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.5813953488372092,
125
+ "grad_norm": 0.902931809425354,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0334,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.6744186046511629,
132
+ "grad_norm": 0.3951447308063507,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0307,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.7674418604651163,
139
+ "grad_norm": 2.1907150745391846,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0596,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.8604651162790697,
146
+ "grad_norm": 1.2620713710784912,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0869,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.9534883720930232,
153
+ "grad_norm": 0.3642316162586212,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0304,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 2.0,
160
+ "step": 43,
161
+ "total_flos": 4812138065428480.0,
162
+ "train_loss": 0.06693689213242641,
163
+ "train_runtime": 216.4073,
164
+ "train_samples_per_second": 0.795,
165
+ "train_steps_per_second": 0.199
166
+ }
167
+ ],
168
+ "logging_steps": 2,
169
+ "max_steps": 43,
170
+ "num_input_tokens_seen": 0,
171
+ "num_train_epochs": 1,
172
+ "save_steps": 500,
173
+ "stateful_callbacks": {
174
+ "TrainerControl": {
175
+ "args": {
176
+ "should_epoch_stop": false,
177
+ "should_evaluate": false,
178
+ "should_log": false,
179
+ "should_save": false,
180
+ "should_training_stop": false
181
+ },
182
+ "attributes": {}
183
+ }
184
+ },
185
+ "total_flos": 4812138065428480.0,
186
+ "train_batch_size": 1,
187
+ "trial_name": null,
188
+ "trial_params": null
189
+ }
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69816f63dda6857999b2188db1a0aaaa9147aab897dcee7573e9d55dbdf1b2cc
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c914d80153a80299ed8993a6ceb64539f8af74f9b6e40638c543863a69ad54f
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ce361c6c22db8c7c4ebfeaae20c30cb50de1598a3657cb2b4d2a0239088930b
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b726647932691999b54f2ab9ec9197c040c249348e7cb4864fddba8cca057773
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63647af6cd32eb3f3541fd71185343a2b826fe5badc33e786d96673a7068f25e
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbae85d3b932e88792d442e91b69a8dd7af46ddbc2f0e332cb4c695c7a9f454b
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc53b17de3dc585196fdcf469a3d94372c45f6eff9d977f4523242386413acb0
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638b6048a232e5fd62e1b30f85d1598fe8d2c095bbb76d1ca75681d6b97b62b3
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 43,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09302325581395349,
13
+ "grad_norm": 3.3456127643585205,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.2545,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.18604651162790697,
20
+ "grad_norm": 0.6647293567657471,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.143,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.27906976744186046,
27
+ "grad_norm": 2.341078519821167,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.1575,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.37209302325581395,
34
+ "grad_norm": 1.038511037826538,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.1649,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.46511627906976744,
41
+ "grad_norm": 1.1091891527175903,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.1359,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.5581395348837209,
48
+ "grad_norm": 0.9437009692192078,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.11,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.6511627906976745,
55
+ "grad_norm": 2.193312168121338,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2129,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.7441860465116279,
62
+ "grad_norm": 1.4751211404800415,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.0952,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.8372093023255814,
69
+ "grad_norm": 0.38253253698349,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0647,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.9302325581395349,
76
+ "grad_norm": 0.6969690322875977,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.1394,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 1.0232558139534884,
83
+ "grad_norm": 0.7709684371948242,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1008,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 1.1162790697674418,
90
+ "grad_norm": 2.740997552871704,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.1238,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.2093023255813953,
97
+ "grad_norm": 1.5686851739883423,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.1016,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.302325581395349,
104
+ "grad_norm": 0.3873344659805298,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0211,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.3953488372093024,
111
+ "grad_norm": 0.8399427533149719,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1423,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.4883720930232558,
118
+ "grad_norm": 2.8620800971984863,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2682,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.5813953488372092,
125
+ "grad_norm": 1.715096116065979,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1543,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.6744186046511629,
132
+ "grad_norm": 0.39780232310295105,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0503,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.7674418604651163,
139
+ "grad_norm": 1.9032905101776123,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1529,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.8604651162790697,
146
+ "grad_norm": 1.9974933862686157,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.1527,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.9534883720930232,
153
+ "grad_norm": 0.41782402992248535,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.1376,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 2.0,
160
+ "step": 43,
161
+ "total_flos": 4715460834623488.0,
162
+ "train_loss": 0.13745101662569267,
163
+ "train_runtime": 217.324,
164
+ "train_samples_per_second": 0.791,
165
+ "train_steps_per_second": 0.198
166
+ }
167
+ ],
168
+ "logging_steps": 2,
169
+ "max_steps": 43,
170
+ "num_input_tokens_seen": 0,
171
+ "num_train_epochs": 1,
172
+ "save_steps": 500,
173
+ "stateful_callbacks": {
174
+ "TrainerControl": {
175
+ "args": {
176
+ "should_epoch_stop": false,
177
+ "should_evaluate": false,
178
+ "should_log": false,
179
+ "should_save": false,
180
+ "should_training_stop": false
181
+ },
182
+ "attributes": {}
183
+ }
184
+ },
185
+ "total_flos": 4715460834623488.0,
186
+ "train_batch_size": 1,
187
+ "trial_name": null,
188
+ "trial_params": null
189
+ }
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f0a3b90e19eabbfc0e5635fe7059549c618b6e036f9f45a4372451a8a36e5e
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4567d214400f00e8531f2289e87b461d7e2ff36d3599953485e74c95a50ae33
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f52ffe528820e313e8399cc62c04cbc2bf7042f11dc9809454f8d9ac8e6419
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95607ac19caf68871278ba85509065e63ada58d495744bd25ca2ae6c2627a16d
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5921fb1eef7a3d375c934d78dd90029f1c52ed7f25c757f29f83bb17cced3e9d
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1eefd262a4d3ee7c54828027c97f4b11f4186d76647bcae6b7ff7378e1a742
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc91a80a0564d2d9cc2000161fd3302ed3cd3d82edead6b1e60e8270032e1a3
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ccdf785607bf48d74b05613167b1393b6d33de3717fd8db901693e4439448fa
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 43,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09302325581395349,
13
+ "grad_norm": 1.1141589879989624,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.1236,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.18604651162790697,
20
+ "grad_norm": 1.2319393157958984,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.3126,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.27906976744186046,
27
+ "grad_norm": 2.423560857772827,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.4022,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.37209302325581395,
34
+ "grad_norm": 2.308655023574829,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.2367,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.46511627906976744,
41
+ "grad_norm": 1.6019493341445923,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.1691,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.5581395348837209,
48
+ "grad_norm": 2.4790854454040527,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.2446,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.6511627906976745,
55
+ "grad_norm": 1.2402669191360474,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.1289,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.7441860465116279,
62
+ "grad_norm": 4.58521842956543,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1723,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.8372093023255814,
69
+ "grad_norm": 1.7242616415023804,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.1184,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.9302325581395349,
76
+ "grad_norm": 2.655269145965576,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.244,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 1.0232558139534884,
83
+ "grad_norm": 2.253636121749878,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.2518,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 1.1162790697674418,
90
+ "grad_norm": 3.0500082969665527,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.2864,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.2093023255813953,
97
+ "grad_norm": 2.2204771041870117,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.31,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.302325581395349,
104
+ "grad_norm": 2.925663948059082,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.5858,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.3953488372093024,
111
+ "grad_norm": 1.716472864151001,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.2467,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.4883720930232558,
118
+ "grad_norm": 2.464635133743286,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2649,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.5813953488372092,
125
+ "grad_norm": 0.828945517539978,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0731,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.6744186046511629,
132
+ "grad_norm": 1.1858415603637695,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.111,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.7674418604651163,
139
+ "grad_norm": 3.732938766479492,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2349,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.8604651162790697,
146
+ "grad_norm": 2.46227765083313,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3129,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.9534883720930232,
153
+ "grad_norm": 1.6540902853012085,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2932,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 2.0,
160
+ "step": 43,
161
+ "total_flos": 4880262747914240.0,
162
+ "train_loss": 0.25222200571104536,
163
+ "train_runtime": 215.4858,
164
+ "train_samples_per_second": 0.798,
165
+ "train_steps_per_second": 0.2
166
+ }
167
+ ],
168
+ "logging_steps": 2,
169
+ "max_steps": 43,
170
+ "num_input_tokens_seen": 0,
171
+ "num_train_epochs": 1,
172
+ "save_steps": 500,
173
+ "stateful_callbacks": {
174
+ "TrainerControl": {
175
+ "args": {
176
+ "should_epoch_stop": false,
177
+ "should_evaluate": false,
178
+ "should_log": false,
179
+ "should_save": false,
180
+ "should_training_stop": false
181
+ },
182
+ "attributes": {}
183
+ }
184
+ },
185
+ "total_flos": 4880262747914240.0,
186
+ "train_batch_size": 1,
187
+ "trial_name": null,
188
+ "trial_params": null
189
+ }
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:676d1a1cc1b175d0235d62fc017e315c318119b6f9d2b694a759bf9012621fa2
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4244a475c759d82fa5ffbcbfc2deb96f9ee806361d03aa645aebf32bc2d0d139
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae5a9f63d9f5f4fd3301f8e4874b04b5995dadb0557e81e711abcd08c7b252f6
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe972cdc2d4d4445927a1f957a4a63e44b72e60301b94c3115794d59381c74a8
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3239853222047f1365deb3434639266349a53ca0b9050cb2fde4032b546dc38e
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe8e87567dbde70e61d4b3893fe944c4cfa319c4867e6672286ef519491e3ee
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64cf784b6a5fa4dcec46cd536c3ea92812bcec7bee6eeb3a6b752d0d9a8adf67
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04dc540d0b92cd9e8c8b92930521667e6c090ca32a551758b702ae8e34e21395
3
+ size 1167511866
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 43,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09302325581395349,
13
+ "grad_norm": 2.0882959365844727,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3258,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.18604651162790697,
20
+ "grad_norm": 1.28811776638031,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.1932,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.27906976744186046,
27
+ "grad_norm": 0.822692334651947,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0951,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.37209302325581395,
34
+ "grad_norm": 1.5997800827026367,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.2066,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.46511627906976744,
41
+ "grad_norm": 2.9270811080932617,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.2142,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.5581395348837209,
48
+ "grad_norm": 0.9111597537994385,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0939,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.6511627906976745,
55
+ "grad_norm": 1.769669771194458,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.1228,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.7441860465116279,
62
+ "grad_norm": 1.8244539499282837,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1983,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.8372093023255814,
69
+ "grad_norm": 1.7385451793670654,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2394,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.9302325581395349,
76
+ "grad_norm": 2.4531519412994385,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.1592,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 1.0232558139534884,
83
+ "grad_norm": 0.8544302582740784,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1533,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 1.1162790697674418,
90
+ "grad_norm": 2.170783519744873,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.3049,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 1.2093023255813953,
97
+ "grad_norm": 1.9732646942138672,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3371,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 1.302325581395349,
104
+ "grad_norm": 3.642188549041748,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.3442,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 1.3953488372093024,
111
+ "grad_norm": 1.494310975074768,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.13,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 1.4883720930232558,
118
+ "grad_norm": 0.3682941496372223,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0284,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 1.5813953488372092,
125
+ "grad_norm": 1.2264622449874878,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.1392,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 1.6744186046511629,
132
+ "grad_norm": 0.5091031193733215,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.1509,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 1.7674418604651163,
139
+ "grad_norm": 1.4979541301727295,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1525,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 1.8604651162790697,
146
+ "grad_norm": 1.4245574474334717,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.2237,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 1.9534883720930232,
153
+ "grad_norm": 1.7625281810760498,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.2382,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 2.0,
160
+ "step": 43,
161
+ "total_flos": 5466572382535680.0,
162
+ "train_loss": 0.19268665757290152,
163
+ "train_runtime": 218.8116,
164
+ "train_samples_per_second": 0.786,
165
+ "train_steps_per_second": 0.197
166
+ }
167
+ ],
168
+ "logging_steps": 2,
169
+ "max_steps": 43,
170
+ "num_input_tokens_seen": 0,
171
+ "num_train_epochs": 1,
172
+ "save_steps": 500,
173
+ "stateful_callbacks": {
174
+ "TrainerControl": {
175
+ "args": {
176
+ "should_epoch_stop": false,
177
+ "should_evaluate": false,
178
+ "should_log": false,
179
+ "should_save": false,
180
+ "should_training_stop": false
181
+ },
182
+ "attributes": {}
183
+ }
184
+ },
185
+ "total_flos": 5466572382535680.0,
186
+ "train_batch_size": 1,
187
+ "trial_name": null,
188
+ "trial_params": null
189
+ }
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88808d32046e53379114e941c371bd75493bde5bb1542d9334164b318c408b0a
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50aa5dce46ef29bd53549ca4bb111aa001b4b81fcbdcd42593a2e89c24d1e609
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1d29c2144b0318477b6e17f2c8a95e0b1b6823db4c651660c3937d120884472
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b310990f72f64fe576d22dcaac6e3f75e347ddddd2ac489381cbd61f1b47a75
3
+ size 1167513110
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a072c7cecc25d135822bb331490b5401228a3292ef9795ef78ae35d285a7e882
3
+ size 1167511866