Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fe1dbc65f417209d09344d4e4995df5d78c5eef26666b716f886d90d26de53b
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0166898c5d59908d3d4e636b1e8705ab438946e36b7f214e4c120674652f0647
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:748f75b79de639a2a4af53a70cce66a8ca2082d031f11a4e62a32187924c4242
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf98a0fa6bcb6b3f4639a9d3ce8d0d4bc9f306fc5c94fe909c335ed5961f3391
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d679df9ca797825f3a82c1c09077f75eeace1c8de08ab30c5fa922069db709e1
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b197e79ebb6d8ae64ec6739b5cbf3cb2c42e1e175ddd75ee657f36a5e8f23cd
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efc8ee78f3b179428383f77c08c7e1005b657efa43a78e8a1acbfab15a6f4244
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0b0c72c61440e9ae802fcad9652d631f5f56aa289e047e253f7ee81d8019dfe
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 0.7212343215942383,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0439,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 11.383496284484863,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.5268,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 0.927216112613678,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.337,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 3.203252077102661,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.3553,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 0.08366145938634872,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0917,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 6.82559061050415,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6837,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 1.8658998012542725,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0646,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 1.4391404390335083,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0581,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 1.4982571601867676,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.3933,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 1.6830452680587769,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1075,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 0.245327427983284,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0312,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 10.72057056427002,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.38,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 1.239396095275879,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0602,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 0.4094916880130768,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0428,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 1.428717851638794,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6015,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 0.11368861049413681,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.1259,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 1.6821730136871338,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1679,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 7.368831157684326,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.3051,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 4.640942096710205,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1196,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 0.3502234220504761,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.1621,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 2.8279569149017334,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.4888,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 0.6382105946540833,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.1357,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 0.40106961131095886,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.3606,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 5.146971702575684,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1506,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 0.3846442997455597,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0316,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5266647493378048.0,
|
190 |
+
"train_loss": 0.23302085906267167,
|
191 |
+
"train_runtime": 198.109,
|
192 |
+
"train_samples_per_second": 1.01,
|
193 |
+
"train_steps_per_second": 0.252
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5266647493378048.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c2aec2a71a2298864fb9cb71bbe157edfe31cb48c8758ed137adf555b57706c
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90b0271eee7daa4e6cf36df93827f4dd84b51c02fbc7eeb354a4584bc9eecc1c
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c319587b2f08331b839f2419acb0e4bfacc9c57991286ab2a77ea96d9feb5cbc
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9bb6d41e065f1080f20435a320c1edfe1f3c8cba9f031e54d7610b9e367f8a9
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f72227369a7df43e68a0bb4081030172e220490ef080b56abc68a7bdce46ea6
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a435b97115a2feb785df092f369b70fa82f9fb72cf73f47bd559d40fa9b5a2a
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0390a2236b53f82449b5243e68ffc4c185e44bc9517abac645cbedc5678052c9
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecfbee4594e86c27822bb6a0d822dc075ba4815540812516b3e99f0069448cce
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 0.2633078396320343,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0962,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 18.888702392578125,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.2108,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 5.582844257354736,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.401,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 0.3722727596759796,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0165,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 0.6710987687110901,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0204,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 0.009970537386834621,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0149,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 0.994692862033844,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0285,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 0.025523852556943893,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.015,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 0.12230795621871948,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.1153,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 0.009217753075063229,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0512,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 2.2184362411499023,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1586,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 7.529627323150635,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.164,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 1.4156068563461304,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.1002,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 0.2617127001285553,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0166,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 0.015605290420353413,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.0145,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 0.057093504816293716,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.015,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 0.13609035313129425,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0151,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 0.03114943951368332,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.1649,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 0.026129912585020065,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0145,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 0.42378631234169006,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0229,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 1.1538219451904297,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0286,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 1.1634211540222168,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0363,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 0.014446967281401157,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0142,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 0.0895637720823288,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0152,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 0.36212360858917236,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0169,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5303430465716224.0,
|
190 |
+
"train_loss": 0.07068853974342346,
|
191 |
+
"train_runtime": 198.2611,
|
192 |
+
"train_samples_per_second": 1.009,
|
193 |
+
"train_steps_per_second": 0.252
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5303430465716224.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc38d3fec8aaf304a73dc0465221009621d12e35fe11437da6cdd8c90b9648e8
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4af31ee51a64d1dc941abff726f2d86b9aed76733dbd2124a460acb8f7fc752
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6760ac39e030ae9fa6ebc136bdc504c2239d72745f5b973ed81a1e0decdfc048
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e5eebbd87377d275d69626cccc34474e06e8611ca75b57099b23a759cb721b7
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1198bde519b9e1569c1f5d34b7cd75ade0f1ccdede78bf1e1d666c2185198bf0
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0db7977fb93fec26d6d524d6bb78a926367418764df5ead8d7437c724c9d30d9
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8e377ed9ce274f12fa63914553e2b4dd7b85b45fcaba55bde1fefdb0aee6e0f
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:154985bd935d4f05b0381b48c6606a5ef06bdc0053cdc7537f2dc22e5dc37771
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 3.617075204849243,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1032,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.15696358680725098,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.1814,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 5.014780521392822,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.761,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 9.337811470031738,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.8625,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 6.394294261932373,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.6403,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 1.694501519203186,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.2702,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 27.820241928100586,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3551,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 2.758286476135254,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4164,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 1.9265002012252808,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.2844,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 3.5484347343444824,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3019,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 2.66292405128479,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2797,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 3.7674098014831543,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.404,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 7.080770969390869,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.4917,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 3.9330906867980957,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3915,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 3.2564377784729004,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.242,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 1.0977956056594849,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0854,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 4.018260478973389,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.5377,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 9.991900444030762,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5558,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 1.1954278945922852,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.6506,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 0.9688401818275452,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.1332,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 5.815992832183838,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.3631,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 2.7181556224823,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2599,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 4.442654132843018,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.191,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 7.31368350982666,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.4697,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 7.1462225914001465,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3045,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5195491428007936.0,
|
190 |
+
"train_loss": 0.42144030570983887,
|
191 |
+
"train_runtime": 199.465,
|
192 |
+
"train_samples_per_second": 1.003,
|
193 |
+
"train_steps_per_second": 0.251
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5195491428007936.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7fc4f7efb25926bfa826673aa01335f815c127661a07f3cbb64a95c8fdffb40
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48b399960eea88ae61a0a1aa3dfd3c1813abcdad91c1b4224ea8933067755a06
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89943c600b2b161ec6014448b900c8adf846847c4b9f68ffbf21de31f5e5d32c
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5699250baf94f3019eaeb1ef41db0f18f6dfd3e0c3bd0a112b411bd4170e0eb1
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:472fbc1f43c917af0bb1f5e94c2286225b782951b640924fbaa37c723473a9ad
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c0e5b108c6b221edf19db70fe14233bf9b9b7422803f13e1c304e11f6e5911a
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c149fa6c092ba05bb37700e485f6699abc82de896528610d1635a195fc9c7a0
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a59d5ce425a0645c183f6974a7833ce97c3e20569b720d8dec7176fff0fd0186
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 8.167108535766602,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.7666,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.970125138759613,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.4831,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 6.168637275695801,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3263,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 4.275420188903809,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4419,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 7.258407115936279,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.7066,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 5.875009059906006,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.8123,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 3.180713653564453,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2229,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 1.822489619255066,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.245,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 8.324261665344238,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.6389,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 4.086201190948486,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1874,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 4.469182968139648,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.6376,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 9.68209171295166,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.4976,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 15.532666206359863,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.6447,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 8.273395538330078,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3491,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 7.468406677246094,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6633,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 8.63184642791748,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.8022,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 12.6749267578125,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.7509,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 4.348174571990967,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.2364,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 4.658010005950928,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.5582,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 3.064706802368164,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.7061,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 15.28968334197998,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.1366,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 4.18186092376709,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.3874,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 2.199023723602295,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4409,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 7.054344177246094,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.9599,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 2.990053653717041,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3781,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5376319969820672.0,
|
190 |
+
"train_loss": 0.599202663898468,
|
191 |
+
"train_runtime": 198.5307,
|
192 |
+
"train_samples_per_second": 1.007,
|
193 |
+
"train_steps_per_second": 0.252
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5376319969820672.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9eb75c8782194720b59887465c995e11d72f78c8aca656c42c9b469a59124a9
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b39a4efe80d568e273b5a623ab5c108b7e27d937316250436a587da562ac3d86
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09f0af7ab102fbed3a844c53a4f7dcc5f21677451244983cab0caf1c747d76f5
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18987b20b7bbc6a88d60b2b91648ad3a08b577918fa565e2278a750aee69cc6d
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bd496e40f9341b403fe665f4a3f2261ae72e39294e7a4678261659e562606ce
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8d9e327003fd39d6bdcc0eabd84abc83f519cb69d6abde49f8d1544c15881a6
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d90208da0627322e15237fe2b0bc8fccf40d8472da50b806d3941b4128ce6b8
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06dc84e7ab1330bf113bc7de877ec9cedc283cb04d0aa6de93816d56ad6d6cbe
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 4.40165376663208,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3787,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 3.2695400714874268,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.6385,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 2.2764341831207275,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3251,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 2.679776191711426,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.3256,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 2.267791748046875,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.2853,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 3.7516863346099854,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.3355,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 4.205787181854248,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3998,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 6.004105567932129,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.371,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 4.294463157653809,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4596,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 3.1517653465270996,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.403,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 3.2551352977752686,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.3488,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 2.0919487476348877,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5908,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 2.9000701904296875,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.4376,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 0.4982464909553528,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.2203,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 3.5159378051757812,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.4652,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 1.462558627128601,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2046,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 3.924109935760498,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.8077,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 3.89208722114563,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.3069,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 1.9117887020111084,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2479,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 4.854358673095703,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3298,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 4.46043586730957,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2984,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 7.219895362854004,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.6064,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 1.80936861038208,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2453,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 2.554882287979126,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1651,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 3.7274348735809326,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.4726,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 6021332957396992.0,
|
190 |
+
"train_loss": 0.38677083015441893,
|
191 |
+
"train_runtime": 198.1718,
|
192 |
+
"train_samples_per_second": 1.009,
|
193 |
+
"train_steps_per_second": 0.252
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 6021332957396992.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0509b8ce6cd3ed751cfef1e396c7179a3f9088099d5cd2cb5b457b319dcebb
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f467a053337fecfa0f847cbc238ac4d4c5e851c9947c49765a915ebdf2da5f08
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71a9446baf72a937c51e8e31dc17acd86d9cf2e6189c9082f6d32dc45a43491c
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7becdb1ad086320eb862ea9500d9bc7b91dd4e237a2a94fcd3b992d0c97c116b
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59ec73292fd4cb63ea34768525c152e88f9998ee0c947f7d860eae16ddd53f7c
|
3 |
+
size 778341034
|