Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7187654c3b2b4a311de981775f06ff9aaec06521ba6f2d55fe0cca74a083b1b8
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cb851df31a265321fab4c368129ff8bdfbcfb7e63e72bb40c7b878badc6ba4d
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0571775d806cb5694d12bbbb59deb5715b149b625563380d3b53f0b2c4a0e406
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11bb9051b5c160af184c1a503e926ded05fb4faa720a3b2ac735df443714d0b7
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:242baa74ecd2e89b8949e441e1eb503db5855524efbfd97bc9c9e937ad7f281a
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28f97f86483164a291e4dfb7b1b7323fe209e49380184a1ab9edb5093a58c2c2
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50eb45b2ed832ed4563d161d284a70b3fd463cd5d377c0fe903be291076a7133
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:928ec6d986859b197e14f18c3d237108b5a867442d77ef59f22e5e74a1481437
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/0_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 17.835098266601562,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.2723,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 5.935124397277832,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.3235,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 11.949447631835938,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.6892,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 4.54430627822876,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.5368,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 16.603609085083008,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.9215,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 6.623965263366699,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.798,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 5.242029666900635,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3287,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 3.1021950244903564,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.1253,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 4.434370517730713,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5437,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 1.40633225440979,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4146,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 5.0599894523620605,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.4193,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 3.8830926418304443,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.4138,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 4.712460517883301,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3751,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 3.2360763549804688,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.1469,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 2.5379559993743896,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.4358,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 8.039641380310059,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3871,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 0.877090334892273,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1099,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 2.0384771823883057,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.2192,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 11.140633583068848,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.8123,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 6.476287364959717,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3829,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 3.3182876110076904,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.6873,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 2.2434990406036377,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2576,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 5.116442680358887,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2816,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 0.7951710820198059,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3147,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 0.8207076191902161,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3024,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5262842336903168.0,
|
190 |
+
"train_loss": 0.4599846029281616,
|
191 |
+
"train_runtime": 188.7148,
|
192 |
+
"train_samples_per_second": 1.06,
|
193 |
+
"train_steps_per_second": 0.265
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5262842336903168.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a99edaae05995e51b6df8fbbe76020d1d486ec42021f513ad5178460e3caf352
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ea5209a560b65ca173a1db13c7882797c188b5f65cd986de2c86f36a11b818b
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b6bd292c09e6962198a402a9c211d7fac264c0dbd9a6790e5fd52fed5cda0ea
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15284a64a6f8f75bd6b4569811f3589ea26f5f4201e28360866fdc2e53febe13
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5711178c5463f7ab46aa53c7536f6313fea20d64a6ab19d533937da9107ba298
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3e45fd3aced541c26cc0e326088f986e7d84f3d20c3f8178f1de7b27303044c
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4aaa54fadfb0db8bc6921f83398260e95c0c7a12df124a447f18c8a99c608c7
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bf21825f399573276b614a5a9d8af30444b25aa7421f5be03a7df223b109e28
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/1_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 0.446603924036026,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1359,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.10891813039779663,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0153,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 0.08321043103933334,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.0524,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 1.2470159530639648,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0364,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 0.0992945060133934,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.082,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 1.0286638736724854,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0263,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 1.4307641983032227,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0744,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 0.12711121141910553,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0168,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 1.582565426826477,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0624,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 1.4729337692260742,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.048,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 0.009418491274118423,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0155,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 0.010489478707313538,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0151,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 1.960877537727356,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0259,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 0.009839626960456371,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.015,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 0.08140977472066879,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.0953,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 0.023913130164146423,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0145,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 3.580852746963501,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1049,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 0.009584111161530018,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0209,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 0.00926352571696043,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0158,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 0.022018805146217346,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0144,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 0.022947989404201508,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0272,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 0.031829629093408585,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0144,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 0.00818221177905798,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0142,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 0.017826130613684654,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1101,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 0.02396584488451481,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0145,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5308165734268928.0,
|
190 |
+
"train_loss": 0.042700646072626115,
|
191 |
+
"train_runtime": 188.7069,
|
192 |
+
"train_samples_per_second": 1.06,
|
193 |
+
"train_steps_per_second": 0.265
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5308165734268928.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a678ec3a1eebf6cd19b4a3a86fcfa17a0a3d1bff7e4b323c0e9b39aab849835d
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:002c0fd0d45d95aab8737555c4d1ab0d6d7268708d58af0a97d03cefe871a06a
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:046b63fd84fdf6ddc23d19f4dfe01cc64d7136a420c811c5306034ed8dff2828
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:776b2874adc5a806bfa2f0a2f117b8262c20d7fe6fd49bc20e85d61f785f03a0
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dc57a934def0e07593f099a111cf66256aa931634f2b19e1d7ab37987939278
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a554f530ff8c965c942e193a07af99983d5d0c26ef275eff9d868203f414b78
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d095e74a0ace44252b160479986b0e2eee7fa4ca6ca4e8022714bf37b8d5a571
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:709a8cae47012ee03fc01a7f0662490660e0669cdaf3b30fd4faa18163209de5
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/2_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 7.620423316955566,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.469,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 1.0009398460388184,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0558,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 7.3740949630737305,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.2361,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 5.340544700622559,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.375,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 3.0642282962799072,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.1286,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 8.644904136657715,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.3327,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 2.4434633255004883,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.492,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 2.4134719371795654,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.452,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 2.4050092697143555,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.2468,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 2.544926643371582,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.2707,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 0.4770296514034271,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.094,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 4.732265949249268,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.2453,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 8.252790451049805,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.363,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 0.9622191786766052,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.1234,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 4.3495564460754395,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1236,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 6.503274440765381,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.7048,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 3.046657085418701,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.2559,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 6.2532219886779785,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.4904,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 5.003266334533691,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1991,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 7.434990406036377,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4284,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 11.749560356140137,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.7576,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 0.8847981095314026,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0488,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 3.2899725437164307,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2683,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 10.162389755249023,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3719,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 3.9631645679473877,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2006,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5203228526379008.0,
|
190 |
+
"train_loss": 0.30934857606887817,
|
191 |
+
"train_runtime": 194.6247,
|
192 |
+
"train_samples_per_second": 1.028,
|
193 |
+
"train_steps_per_second": 0.257
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5203228526379008.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fbb9b3083f4adea56fa40d73ef2d276d34dd1141a1e0b879214fa5297f2a67e
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2d2bbd2320db1c97e9b8c19641cc4013bd3bdbcb441849f2f5b71259205ef67
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5cf9bee6f3298e1ddf6509dc4d30689d7f14c53515c656a5938157a99ce5038
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:847972a0a4cc0969f6079db8ca3e297823e60fdf891791df893b8be17a649f6d
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b735d618d85e4fcdf832aa2fa46fc1704052fa38a48cfffefe5432589d28adf
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:668b35e725756d711bf9548304118bdff091fc57487f08deec840e08de74ef0a
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ff73fbd2d92a8f6232c87ae094040bfb7e7ca7e47656fdcd9bce8f5823f4092
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bbb12f3d11eb2d3337fc2eddfec53dc06daec64b872efa8fc71743ee8239c95
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/3_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 12.495404243469238,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.779,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 6.629724502563477,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.5,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 7.101356029510498,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.7022,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 7.535829067230225,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.9247,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 2.729475259780884,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.4091,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 8.636240005493164,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.8681,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 8.66524600982666,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.7492,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 3.2647829055786133,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4108,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 5.925101280212402,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.6516,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 3.2308082580566406,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4492,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 4.235790729522705,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.6326,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 4.701728343963623,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.3093,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 8.327908515930176,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.5139,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 4.828310966491699,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.6246,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 7.388039588928223,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.8027,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 6.946700096130371,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.5852,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 0.9494185447692871,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.3423,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 4.723056316375732,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.6249,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 3.943333148956299,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2556,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 2.8990371227264404,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3113,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 5.895607948303223,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.687,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 2.503542184829712,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.5024,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 6.042721748352051,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4905,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 1.758901834487915,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.7268,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 4.603387355804443,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2878,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5379871421235200.0,
|
190 |
+
"train_loss": 0.5656331896781921,
|
191 |
+
"train_runtime": 188.8767,
|
192 |
+
"train_samples_per_second": 1.059,
|
193 |
+
"train_steps_per_second": 0.265
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5379871421235200.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb08e16fa47f8165cdd04de2f96bf67351d5afb5e26d3062daaca4add4cacb6e
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2c7a3ebf6b75ad8d563055649dd9a3c67c241c89d20f465d03c9cea0290283e
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93c7d386f0503d5012cab1569ee8733707f70dba8032da855daf25e67911039b
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4e4be141eba949984993e2c890086131a2d212ae51629882e4cb2e8a7ed944d
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b853c0ef95591cbd7c88b47800c735ac8c4fda60dfd5924f47714cf9d0b9f334
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0114a706b3459ce20699f3ed2de459d76ff071595681598c417000763c3845c
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6038955452646355952c1af52aa0938ec96a5155c97e81eeebbf3385676f6e3d
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dce5d8b262c68011e3e8b47d0940963239afc7c98986a7b48601fd1f7483f41
|
3 |
+
size 778341034
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/4_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 0.3974245488643646,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.319,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.6956784725189209,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0874,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 4.481583118438721,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3417,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 2.996901512145996,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4057,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 3.626215696334839,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.1893,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 3.7142386436462402,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.3213,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 3.844820737838745,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.252,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 0.8332847952842712,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4195,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 2.419525623321533,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.1124,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 4.048295497894287,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4657,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 1.1390035152435303,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2091,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 9.533462524414062,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5139,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 8.884787559509277,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.72,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 11.820172309875488,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.7309,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 3.786125421524048,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.2382,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 4.292237758636475,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.516,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 4.905089378356934,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.4936,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 4.8835859298706055,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.425,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 5.083212375640869,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3309,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 3.6019129753112793,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4558,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 2.5178937911987305,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.3123,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 3.1368069648742676,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2818,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 2.9503908157348633,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2565,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 3.0946178436279297,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.4001,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 7.015686988830566,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3228,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 6019810878029824.0,
|
190 |
+
"train_loss": 0.36482719421386717,
|
191 |
+
"train_runtime": 190.5862,
|
192 |
+
"train_samples_per_second": 1.049,
|
193 |
+
"train_steps_per_second": 0.262
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 6019810878029824.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4632c0a7954ad392899f25b025e91ff1a12fb835b79f8cc033da3d88b2763a89
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af08c046b78b082c9a7a6c1b948e2ffa1b866fcde980f8adbe1e06e2b3e767bb
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ae2b9cf0d05969ee3ed4f5adde29f1d459afe52b86afd14e56335252e22841e
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6afa1969e53cc2e368fc21263bc63819b02f23c15ed64631e61c910144d2eae9
|
3 |
+
size 778341886
|
client_states_ditto_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixit50_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:787b721dcc19ec1b987482642be00562f9b7c59cce7f7870cd1d113cab978fc0
|
3 |
+
size 778341034
|