Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_trainer_state.json +189 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f06a89dfcac5961ac5bdb293a2a3f574fcb9216e4593500ab6c2155ff3df0313
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fe982dd9d5a3ae03280e3447d8063dcc43629d7dca6b8e90b03907c76b90a58
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aedba7096d5b5406fd46033b0b93b844abbe3d7454189a2035107d0dbb00caaa
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0526023fc6ccb898eb90c77e6bdc99b569624d6470ad5b920a20e689c289ad2b
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a263e7e7ee03bfcf87feac35953e8aa2fa2ce0c0e3c56c5dffbcf6c8a7d52721
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8e10816fae9b78236f55f93f29814d10a19539fd654a0c3e9737cbd47cd8d9d
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f4335cc459cd1552cda05f02b928ed4417cd0d799ff11e7ef4d08a16e0b8484
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c206007eb99dbc9905215c8f5443524bbcf2827728a8be2f97d0edee647ca575
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 43,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.09302325581395349,
|
13 |
+
"grad_norm": 2.9576117992401123,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.2411,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.18604651162790697,
|
20 |
+
"grad_norm": 1.2031710147857666,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0578,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.27906976744186046,
|
27 |
+
"grad_norm": 2.9653682708740234,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3511,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.37209302325581395,
|
34 |
+
"grad_norm": 4.665686130523682,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.1305,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.46511627906976744,
|
41 |
+
"grad_norm": 2.068437337875366,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.1576,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.5581395348837209,
|
48 |
+
"grad_norm": 0.7609916925430298,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.1547,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.6511627906976745,
|
55 |
+
"grad_norm": 0.8752633333206177,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2439,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.7441860465116279,
|
62 |
+
"grad_norm": 0.7228463292121887,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0869,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.8372093023255814,
|
69 |
+
"grad_norm": 0.5220616459846497,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.1898,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.9302325581395349,
|
76 |
+
"grad_norm": 0.5391299724578857,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0884,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.0232558139534884,
|
83 |
+
"grad_norm": 0.4390765428543091,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.038,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.1162790697674418,
|
90 |
+
"grad_norm": 4.568640232086182,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.2474,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.2093023255813953,
|
97 |
+
"grad_norm": 4.5196356773376465,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3904,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.302325581395349,
|
104 |
+
"grad_norm": 0.23186014592647552,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.101,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.3953488372093024,
|
111 |
+
"grad_norm": 2.5624356269836426,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1768,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.4883720930232558,
|
118 |
+
"grad_norm": 0.4649677872657776,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0719,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.5813953488372092,
|
125 |
+
"grad_norm": 0.7176182270050049,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1396,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.6744186046511629,
|
132 |
+
"grad_norm": 2.2132463455200195,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.087,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.7674418604651163,
|
139 |
+
"grad_norm": 1.706496238708496,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2997,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.8604651162790697,
|
146 |
+
"grad_norm": 0.48359882831573486,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0237,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.9534883720930232,
|
153 |
+
"grad_norm": 0.40984606742858887,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0393,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 2.0,
|
160 |
+
"step": 43,
|
161 |
+
"total_flos": 4777205812953088.0,
|
162 |
+
"train_loss": 0.15616962521575217,
|
163 |
+
"train_runtime": 217.1153,
|
164 |
+
"train_samples_per_second": 0.792,
|
165 |
+
"train_steps_per_second": 0.198
|
166 |
+
}
|
167 |
+
],
|
168 |
+
"logging_steps": 2,
|
169 |
+
"max_steps": 43,
|
170 |
+
"num_input_tokens_seen": 0,
|
171 |
+
"num_train_epochs": 1,
|
172 |
+
"save_steps": 500,
|
173 |
+
"stateful_callbacks": {
|
174 |
+
"TrainerControl": {
|
175 |
+
"args": {
|
176 |
+
"should_epoch_stop": false,
|
177 |
+
"should_evaluate": false,
|
178 |
+
"should_log": false,
|
179 |
+
"should_save": false,
|
180 |
+
"should_training_stop": false
|
181 |
+
},
|
182 |
+
"attributes": {}
|
183 |
+
}
|
184 |
+
},
|
185 |
+
"total_flos": 4777205812953088.0,
|
186 |
+
"train_batch_size": 1,
|
187 |
+
"trial_name": null,
|
188 |
+
"trial_params": null
|
189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71c6fce6889d0763e53ec11b925ecf780872259115e0a4de278fecb1730f1c44
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:818822b11989459a5fc595b2555903333d4fbd06ec209d9f8fd739991ecd393f
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c441dfd77124cafa5afe9a4f3e30bd524ebfeb667ce4cb78133a3d18595567b1
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76093b70c93eb8976c70ba3b52470568b0f3d0896ec2dec92615014a4a25ae3e
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27e267c53893530bd8784e742b230c103f0af6e3be5e71601f6620f4c0100934
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:225727d72f520f2af545e163e03222702bf08e8084ca44d12dd0337c3ad2899d
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51d18abf0982eaad9817b60cffb3c40f99da4761429ac0c24f6149861969da4c
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e2e06f880660e7b5ffca36b45892d7f2ed89797c6ff687b9dc8da9431886cfa
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 43,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.09302325581395349,
|
13 |
+
"grad_norm": 0.3085244596004486,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1226,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.18604651162790697,
|
20 |
+
"grad_norm": 3.019911289215088,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.2883,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.27906976744186046,
|
27 |
+
"grad_norm": 1.109711766242981,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.1198,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.37209302325581395,
|
34 |
+
"grad_norm": 0.4532826840877533,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0175,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.46511627906976744,
|
41 |
+
"grad_norm": 0.8678146600723267,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0323,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.5581395348837209,
|
48 |
+
"grad_norm": 2.618272066116333,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.1189,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.6511627906976745,
|
55 |
+
"grad_norm": 0.20907337963581085,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0141,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.7441860465116279,
|
62 |
+
"grad_norm": 1.467644214630127,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.1211,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.8372093023255814,
|
69 |
+
"grad_norm": 1.8505585193634033,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0966,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.9302325581395349,
|
76 |
+
"grad_norm": 0.5534336566925049,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0249,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.0232558139534884,
|
83 |
+
"grad_norm": 0.42154374718666077,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0169,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.1162790697674418,
|
90 |
+
"grad_norm": 0.37012800574302673,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0338,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.2093023255813953,
|
97 |
+
"grad_norm": 0.9166113138198853,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0618,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.302325581395349,
|
104 |
+
"grad_norm": 0.27272695302963257,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0284,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.3953488372093024,
|
111 |
+
"grad_norm": 0.2518395781517029,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.0126,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.4883720930232558,
|
118 |
+
"grad_norm": 0.7915253043174744,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0801,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.5813953488372092,
|
125 |
+
"grad_norm": 0.902931809425354,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0334,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.6744186046511629,
|
132 |
+
"grad_norm": 0.3951447308063507,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0307,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.7674418604651163,
|
139 |
+
"grad_norm": 2.1907150745391846,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0596,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.8604651162790697,
|
146 |
+
"grad_norm": 1.2620713710784912,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0869,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.9534883720930232,
|
153 |
+
"grad_norm": 0.3642316162586212,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0304,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 2.0,
|
160 |
+
"step": 43,
|
161 |
+
"total_flos": 4812138065428480.0,
|
162 |
+
"train_loss": 0.06693689213242641,
|
163 |
+
"train_runtime": 216.4073,
|
164 |
+
"train_samples_per_second": 0.795,
|
165 |
+
"train_steps_per_second": 0.199
|
166 |
+
}
|
167 |
+
],
|
168 |
+
"logging_steps": 2,
|
169 |
+
"max_steps": 43,
|
170 |
+
"num_input_tokens_seen": 0,
|
171 |
+
"num_train_epochs": 1,
|
172 |
+
"save_steps": 500,
|
173 |
+
"stateful_callbacks": {
|
174 |
+
"TrainerControl": {
|
175 |
+
"args": {
|
176 |
+
"should_epoch_stop": false,
|
177 |
+
"should_evaluate": false,
|
178 |
+
"should_log": false,
|
179 |
+
"should_save": false,
|
180 |
+
"should_training_stop": false
|
181 |
+
},
|
182 |
+
"attributes": {}
|
183 |
+
}
|
184 |
+
},
|
185 |
+
"total_flos": 4812138065428480.0,
|
186 |
+
"train_batch_size": 1,
|
187 |
+
"trial_name": null,
|
188 |
+
"trial_params": null
|
189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69816f63dda6857999b2188db1a0aaaa9147aab897dcee7573e9d55dbdf1b2cc
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c914d80153a80299ed8993a6ceb64539f8af74f9b6e40638c543863a69ad54f
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ce361c6c22db8c7c4ebfeaae20c30cb50de1598a3657cb2b4d2a0239088930b
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b726647932691999b54f2ab9ec9197c040c249348e7cb4864fddba8cca057773
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63647af6cd32eb3f3541fd71185343a2b826fe5badc33e786d96673a7068f25e
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbae85d3b932e88792d442e91b69a8dd7af46ddbc2f0e332cb4c695c7a9f454b
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc53b17de3dc585196fdcf469a3d94372c45f6eff9d977f4523242386413acb0
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:638b6048a232e5fd62e1b30f85d1598fe8d2c095bbb76d1ca75681d6b97b62b3
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 43,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.09302325581395349,
|
13 |
+
"grad_norm": 3.3456127643585205,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.2545,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.18604651162790697,
|
20 |
+
"grad_norm": 0.6647293567657471,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.143,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.27906976744186046,
|
27 |
+
"grad_norm": 2.341078519821167,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.1575,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.37209302325581395,
|
34 |
+
"grad_norm": 1.038511037826538,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.1649,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.46511627906976744,
|
41 |
+
"grad_norm": 1.1091891527175903,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.1359,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.5581395348837209,
|
48 |
+
"grad_norm": 0.9437009692192078,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.11,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.6511627906976745,
|
55 |
+
"grad_norm": 2.193312168121338,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2129,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.7441860465116279,
|
62 |
+
"grad_norm": 1.4751211404800415,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0952,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.8372093023255814,
|
69 |
+
"grad_norm": 0.38253253698349,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0647,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.9302325581395349,
|
76 |
+
"grad_norm": 0.6969690322875977,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1394,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.0232558139534884,
|
83 |
+
"grad_norm": 0.7709684371948242,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1008,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.1162790697674418,
|
90 |
+
"grad_norm": 2.740997552871704,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.1238,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.2093023255813953,
|
97 |
+
"grad_norm": 1.5686851739883423,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.1016,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.302325581395349,
|
104 |
+
"grad_norm": 0.3873344659805298,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0211,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.3953488372093024,
|
111 |
+
"grad_norm": 0.8399427533149719,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1423,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.4883720930232558,
|
118 |
+
"grad_norm": 2.8620800971984863,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2682,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.5813953488372092,
|
125 |
+
"grad_norm": 1.715096116065979,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1543,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.6744186046511629,
|
132 |
+
"grad_norm": 0.39780232310295105,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0503,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.7674418604651163,
|
139 |
+
"grad_norm": 1.9032905101776123,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1529,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.8604651162790697,
|
146 |
+
"grad_norm": 1.9974933862686157,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.1527,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.9534883720930232,
|
153 |
+
"grad_norm": 0.41782402992248535,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.1376,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 2.0,
|
160 |
+
"step": 43,
|
161 |
+
"total_flos": 4715460834623488.0,
|
162 |
+
"train_loss": 0.13745101662569267,
|
163 |
+
"train_runtime": 217.324,
|
164 |
+
"train_samples_per_second": 0.791,
|
165 |
+
"train_steps_per_second": 0.198
|
166 |
+
}
|
167 |
+
],
|
168 |
+
"logging_steps": 2,
|
169 |
+
"max_steps": 43,
|
170 |
+
"num_input_tokens_seen": 0,
|
171 |
+
"num_train_epochs": 1,
|
172 |
+
"save_steps": 500,
|
173 |
+
"stateful_callbacks": {
|
174 |
+
"TrainerControl": {
|
175 |
+
"args": {
|
176 |
+
"should_epoch_stop": false,
|
177 |
+
"should_evaluate": false,
|
178 |
+
"should_log": false,
|
179 |
+
"should_save": false,
|
180 |
+
"should_training_stop": false
|
181 |
+
},
|
182 |
+
"attributes": {}
|
183 |
+
}
|
184 |
+
},
|
185 |
+
"total_flos": 4715460834623488.0,
|
186 |
+
"train_batch_size": 1,
|
187 |
+
"trial_name": null,
|
188 |
+
"trial_params": null
|
189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9f0a3b90e19eabbfc0e5635fe7059549c618b6e036f9f45a4372451a8a36e5e
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4567d214400f00e8531f2289e87b461d7e2ff36d3599953485e74c95a50ae33
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36f52ffe528820e313e8399cc62c04cbc2bf7042f11dc9809454f8d9ac8e6419
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95607ac19caf68871278ba85509065e63ada58d495744bd25ca2ae6c2627a16d
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5921fb1eef7a3d375c934d78dd90029f1c52ed7f25c757f29f83bb17cced3e9d
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b1eefd262a4d3ee7c54828027c97f4b11f4186d76647bcae6b7ff7378e1a742
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fc91a80a0564d2d9cc2000161fd3302ed3cd3d82edead6b1e60e8270032e1a3
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ccdf785607bf48d74b05613167b1393b6d33de3717fd8db901693e4439448fa
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 43,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.09302325581395349,
|
13 |
+
"grad_norm": 1.1141589879989624,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1236,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.18604651162790697,
|
20 |
+
"grad_norm": 1.2319393157958984,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.3126,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.27906976744186046,
|
27 |
+
"grad_norm": 2.423560857772827,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.4022,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.37209302325581395,
|
34 |
+
"grad_norm": 2.308655023574829,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.2367,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.46511627906976744,
|
41 |
+
"grad_norm": 1.6019493341445923,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.1691,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.5581395348837209,
|
48 |
+
"grad_norm": 2.4790854454040527,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.2446,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.6511627906976745,
|
55 |
+
"grad_norm": 1.2402669191360474,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.1289,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.7441860465116279,
|
62 |
+
"grad_norm": 4.58521842956543,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.1723,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.8372093023255814,
|
69 |
+
"grad_norm": 1.7242616415023804,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.1184,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.9302325581395349,
|
76 |
+
"grad_norm": 2.655269145965576,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.244,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.0232558139534884,
|
83 |
+
"grad_norm": 2.253636121749878,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2518,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.1162790697674418,
|
90 |
+
"grad_norm": 3.0500082969665527,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.2864,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.2093023255813953,
|
97 |
+
"grad_norm": 2.2204771041870117,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.31,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.302325581395349,
|
104 |
+
"grad_norm": 2.925663948059082,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.5858,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.3953488372093024,
|
111 |
+
"grad_norm": 1.716472864151001,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.2467,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.4883720930232558,
|
118 |
+
"grad_norm": 2.464635133743286,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2649,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.5813953488372092,
|
125 |
+
"grad_norm": 0.828945517539978,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0731,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.6744186046511629,
|
132 |
+
"grad_norm": 1.1858415603637695,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.111,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.7674418604651163,
|
139 |
+
"grad_norm": 3.732938766479492,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2349,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.8604651162790697,
|
146 |
+
"grad_norm": 2.46227765083313,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3129,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.9534883720930232,
|
153 |
+
"grad_norm": 1.6540902853012085,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2932,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 2.0,
|
160 |
+
"step": 43,
|
161 |
+
"total_flos": 4880262747914240.0,
|
162 |
+
"train_loss": 0.25222200571104536,
|
163 |
+
"train_runtime": 215.4858,
|
164 |
+
"train_samples_per_second": 0.798,
|
165 |
+
"train_steps_per_second": 0.2
|
166 |
+
}
|
167 |
+
],
|
168 |
+
"logging_steps": 2,
|
169 |
+
"max_steps": 43,
|
170 |
+
"num_input_tokens_seen": 0,
|
171 |
+
"num_train_epochs": 1,
|
172 |
+
"save_steps": 500,
|
173 |
+
"stateful_callbacks": {
|
174 |
+
"TrainerControl": {
|
175 |
+
"args": {
|
176 |
+
"should_epoch_stop": false,
|
177 |
+
"should_evaluate": false,
|
178 |
+
"should_log": false,
|
179 |
+
"should_save": false,
|
180 |
+
"should_training_stop": false
|
181 |
+
},
|
182 |
+
"attributes": {}
|
183 |
+
}
|
184 |
+
},
|
185 |
+
"total_flos": 4880262747914240.0,
|
186 |
+
"train_batch_size": 1,
|
187 |
+
"trial_name": null,
|
188 |
+
"trial_params": null
|
189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:676d1a1cc1b175d0235d62fc017e315c318119b6f9d2b694a759bf9012621fa2
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4244a475c759d82fa5ffbcbfc2deb96f9ee806361d03aa645aebf32bc2d0d139
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae5a9f63d9f5f4fd3301f8e4874b04b5995dadb0557e81e711abcd08c7b252f6
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe972cdc2d4d4445927a1f957a4a63e44b72e60301b94c3115794d59381c74a8
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3239853222047f1365deb3434639266349a53ca0b9050cb2fde4032b546dc38e
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fe8e87567dbde70e61d4b3893fe944c4cfa319c4867e6672286ef519491e3ee
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64cf784b6a5fa4dcec46cd536c3ea92812bcec7bee6eeb3a6b752d0d9a8adf67
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04dc540d0b92cd9e8c8b92930521667e6c090ca32a551758b702ae8e34e21395
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 43,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.09302325581395349,
|
13 |
+
"grad_norm": 2.0882959365844727,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3258,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.18604651162790697,
|
20 |
+
"grad_norm": 1.28811776638031,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.1932,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.27906976744186046,
|
27 |
+
"grad_norm": 0.822692334651947,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.0951,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.37209302325581395,
|
34 |
+
"grad_norm": 1.5997800827026367,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.2066,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.46511627906976744,
|
41 |
+
"grad_norm": 2.9270811080932617,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.2142,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.5581395348837209,
|
48 |
+
"grad_norm": 0.9111597537994385,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0939,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.6511627906976745,
|
55 |
+
"grad_norm": 1.769669771194458,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.1228,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.7441860465116279,
|
62 |
+
"grad_norm": 1.8244539499282837,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.1983,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.8372093023255814,
|
69 |
+
"grad_norm": 1.7385451793670654,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.2394,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.9302325581395349,
|
76 |
+
"grad_norm": 2.4531519412994385,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1592,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.0232558139534884,
|
83 |
+
"grad_norm": 0.8544302582740784,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1533,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.1162790697674418,
|
90 |
+
"grad_norm": 2.170783519744873,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.3049,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.2093023255813953,
|
97 |
+
"grad_norm": 1.9732646942138672,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3371,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.302325581395349,
|
104 |
+
"grad_norm": 3.642188549041748,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3442,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.3953488372093024,
|
111 |
+
"grad_norm": 1.494310975074768,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.13,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.4883720930232558,
|
118 |
+
"grad_norm": 0.3682941496372223,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0284,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.5813953488372092,
|
125 |
+
"grad_norm": 1.2264622449874878,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1392,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.6744186046511629,
|
132 |
+
"grad_norm": 0.5091031193733215,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.1509,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.7674418604651163,
|
139 |
+
"grad_norm": 1.4979541301727295,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1525,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.8604651162790697,
|
146 |
+
"grad_norm": 1.4245574474334717,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.2237,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.9534883720930232,
|
153 |
+
"grad_norm": 1.7625281810760498,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2382,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 2.0,
|
160 |
+
"step": 43,
|
161 |
+
"total_flos": 5466572382535680.0,
|
162 |
+
"train_loss": 0.19268665757290152,
|
163 |
+
"train_runtime": 218.8116,
|
164 |
+
"train_samples_per_second": 0.786,
|
165 |
+
"train_steps_per_second": 0.197
|
166 |
+
}
|
167 |
+
],
|
168 |
+
"logging_steps": 2,
|
169 |
+
"max_steps": 43,
|
170 |
+
"num_input_tokens_seen": 0,
|
171 |
+
"num_train_epochs": 1,
|
172 |
+
"save_steps": 500,
|
173 |
+
"stateful_callbacks": {
|
174 |
+
"TrainerControl": {
|
175 |
+
"args": {
|
176 |
+
"should_epoch_stop": false,
|
177 |
+
"should_evaluate": false,
|
178 |
+
"should_log": false,
|
179 |
+
"should_save": false,
|
180 |
+
"should_training_stop": false
|
181 |
+
},
|
182 |
+
"attributes": {}
|
183 |
+
}
|
184 |
+
},
|
185 |
+
"total_flos": 5466572382535680.0,
|
186 |
+
"train_batch_size": 1,
|
187 |
+
"trial_name": null,
|
188 |
+
"trial_params": null
|
189 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88808d32046e53379114e941c371bd75493bde5bb1542d9334164b318c408b0a
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50aa5dce46ef29bd53549ca4bb111aa001b4b81fcbdcd42593a2e89c24d1e609
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1d29c2144b0318477b6e17f2c8a95e0b1b6823db4c651660c3937d120884472
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b310990f72f64fe576d22dcaac6e3f75e347ddddd2ac489381cbd61f1b47a75
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr43_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a072c7cecc25d135822bb331490b5401228a3292ef9795ef78ae35d285a7e882
|
3 |
+
size 1167511866
|