Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bf1bd922105b202c1332673b2650c3de98e3475fb786f5b8afb11368e5b6ecb
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7719e20ac179b3505bc16fe3d26557236f289d0aafd2a963fe447a9af6798b3
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c93ab2b8d890ac3fe7e1ea0de67b9123a0488d5bd7cb7109ea6c97edcf22c3d
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d18901eb260c181d30c1e1be564dd71da17a34f6bf6c333a89856341a4b907c
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85c6f07499b6d135cb2c16ce612f45f129974177654fe29012b34255fd4e71ec
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cf6d5f3802665484ccd7c41c90f01c08c8eb716f3bee52a676c0a2ea8f4cafa
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef8a5ad5233a4d504fc5a5368036b3332e83946a37160464ca568222cafaecdf
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:766c6490fd2b03599de1b54601d6a6664ef9260808f2c2b8c964dff572c8a955
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 3.284128189086914,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0793,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 3.4005775451660156,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0915,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 1.777352213859558,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.043,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 4.783378601074219,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.8805,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 7.2065749168396,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.3896,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 1.6766301393508911,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6725,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 15.141353607177734,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.6133,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 17.0766544342041,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.8163,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 9.064810752868652,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.6379,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 11.561620712280273,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.3389,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 10.447920799255371,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1638,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 7.2391462326049805,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.4152,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 6.883174896240234,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.1845,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 2.764719247817993,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.2519,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 6.025578498840332,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1062,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 12.170540809631348,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.6212,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 7.470730781555176,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1638,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 11.806479454040527,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.8462,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 1.250801920890808,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1253,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 12.122519493103027,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3373,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 3.9410598278045654,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0798,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 3.405806064605713,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.1707,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 29.996286392211914,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.9968,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 5.970231533050537,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.2291,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 0.9755194187164307,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2356,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2184626743279616.0,
|
190 |
+
"train_loss": 0.3796170651912689,
|
191 |
+
"train_runtime": 115.2896,
|
192 |
+
"train_samples_per_second": 1.735,
|
193 |
+
"train_steps_per_second": 0.434
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2184626743279616.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6011d6315d2580827738242cee8ef75d254b4cea2dd8679d0473b7a9db54cc9f
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad508db3f5914917b8e2316d601bde72f9dd11c3f02fda6fd948437630f223e8
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d5a375f55d8fb1c9f9b8b9d71e2f41433471b22a3f71c7c0ece807ff8703d3b
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0dbffa1ada429cae02a98572d4793e4f33e169588a8884543c9edfa076d3016
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4840c34ce37f38ac86c58160329347c4f31b8a0d2ca8c6f126c58a50979f8da0
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40a6e5dae1e8232e195fdc1aedc64fecf22d65266fc23ce9c8416e8884fb3793
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60bd84c9ce839d36a98d585b4eb739425ac0dda9019d51f2c95a11922d6e2fc0
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:778f95e12407ad50b015c33069c9a39c1140bc5a2a856c0e9f7fff2badc47c7a
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 1.1501377820968628,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0192,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.30648428201675415,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0154,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 0.0933217778801918,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.1095,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 5.344268321990967,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0769,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 0.127670556306839,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0712,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 0.12052742391824722,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0189,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 0.04328983649611473,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0113,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 0.04012615233659744,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0542,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 0.18714116513729095,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0121,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 3.1324093341827393,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0418,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 0.05844057723879814,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0108,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 6.132259368896484,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.3944,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 0.0780622586607933,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0122,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 34.591758728027344,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.6219,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 0.04694774001836777,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.0107,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 0.03775114193558693,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0645,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 0.4576091468334198,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0172,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 3.4563567638397217,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0723,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 3.871311902999878,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1694,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 0.24964451789855957,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.1908,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 0.06994156539440155,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0383,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 0.0806451290845871,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0183,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 4.901146411895752,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.1231,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 0.1762755960226059,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0135,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 5.6551384925842285,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0321,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2202126654636032.0,
|
190 |
+
"train_loss": 0.0887929368019104,
|
191 |
+
"train_runtime": 116.0987,
|
192 |
+
"train_samples_per_second": 1.723,
|
193 |
+
"train_steps_per_second": 0.431
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2202126654636032.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78ac4e1058836b6234379b6e570fa3e71002444f037f47c23c6074c38371cca6
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c927ff8e1b3b4faaa34bde0c2692889ce1c086425706d7280a79dc051533dee
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f95ac4a906febc91c5cdb3813cde6eb88cfe3bc22ba6fb8af96611171ca1a688
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e262c5e68c4341c4604cab13775efef4ca2fa56dcb00f1b4eb61a6aac7db31c
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d86da56ef2fcb0770168449eb48fb8251868ad508baf093591f10e001ac71632
|
3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00580e00a33f9296ac0a8c5158f4c03322b348cde61b769eaa5cd7738ec10606
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83d7537fc2c48cf1786a1ee004cc1881ef12160aaa0d2cff908db07aa3085b32
|
3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bbb7ca95f52ed9b394dbb71a9734c8caa2eb2cf282d47f60c1fa00311a262ed
|
3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 3.62923264503479,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.2634,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 2.1570146083831787,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.2137,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 4.34098482131958,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.5009,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 9.01052474975586,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.2679,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 4.558963298797607,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.6763,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 5.2403740882873535,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.631,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 0.49892091751098633,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.321,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 4.3842668533325195,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.5071,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 3.5485029220581055,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.2333,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 8.232368469238281,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4089,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 1.4529104232788086,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.265,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 3.8828446865081787,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.2181,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 1.24514901638031,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3733,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 9.44221305847168,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4971,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 2.5544800758361816,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.3479,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 7.6183319091796875,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2933,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 5.657953262329102,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.451,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 1.417629361152649,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.1971,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 2.1960906982421875,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3496,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 4.147959232330322,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.8169,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 1.6373600959777832,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2783,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 1.473294973373413,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2325,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 4.739127159118652,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.3456,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 1.90013587474823,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1663,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 5.234428405761719,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3906,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5202848019120128.0,
|
190 |
+
"train_loss": 0.36984447479248045,
|
191 |
+
"train_runtime": 190.6891,
|
192 |
+
"train_samples_per_second": 1.049,
|
193 |
+
"train_steps_per_second": 0.262
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5202848019120128.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d13906ad1f936437fd1ca3ff97f1ba801c71be2b6f272adbb73ae8ab17b6e26e
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38308af0d31e928983ecb45207689c09907a4de47c8986628f7604409ef92085
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71a124f96018f2b50d800a1d7250ca46a03878d242a4c911ada9f11011f69b97
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fc1ef0d486b875990200b1dd094d84b2570ed11e53f6a9ee10f35d34f0f49ca
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb4e079328106d0413c0e0836461aaaa74ddb45de0b53648b630bdc6a6632418
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7865e64171462c37128ea3d65f6aa1f8d3f8ad1fae133ed26642947ec64cc251
|
3 |
+
size 360880622
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4045eac0ab4825692e2c1f2596ed0574497bab56ebb98680bbfa7a3d0cc80c1
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1db6ddaba6447528d3500e93c52fc8187a5fd82d3824af797deebd9999a4d9ac
|
3 |
+
size 360880106
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 7.067582607269287,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3825,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 4.283020973205566,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.2845,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 6.822042465209961,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.7916,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 9.666316986083984,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.3412,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 16.465579986572266,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.8459,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 6.560152053833008,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6335,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 4.862478733062744,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.6741,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 7.447726249694824,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.5064,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 7.4205098152160645,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.9029,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 15.627077102661133,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.8091,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 16.306581497192383,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.7351,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 4.587651252746582,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.1841,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 19.92629623413086,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.5481,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 3.447114944458008,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3892,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 14.376614570617676,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6816,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 19.715835571289062,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.6575,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 17.579479217529297,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.8478,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 8.443402290344238,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.2789,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 2.802513599395752,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3138,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 8.755653381347656,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.7962,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 11.076048851013184,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.6601,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 9.736947059631348,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.9945,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 9.903741836547852,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.6868,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 26.1480655670166,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.0493,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 10.546011924743652,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.8659,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2231176135704576.0,
|
190 |
+
"train_loss": 0.6744278335571289,
|
191 |
+
"train_runtime": 114.3015,
|
192 |
+
"train_samples_per_second": 1.75,
|
193 |
+
"train_steps_per_second": 0.437
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2231176135704576.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:169b161921bb01bf190d8f33849b33f9c52e7d430415f5e5a702607ff51d21b0
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31f3654c2de0c19271419c744626b570b8ee010b77749be74aa4c396bf6a43bc
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91f9067a5bd4fbe9beae979d71170b2c1e372c67acd5023c3e986430948e2719
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b889c9885f7c81ae50f155aebf41c60c283c525132b24699311c6c6f7f12500a
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb7771d615a3c16f065855527c013f6e19250c242746206ff6b6a409e79a45f3
|
3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c87725e139a42e875b6819300a7f140a858cd2d2db35b382faba6ce53d8e3d7
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a336edf31a143a41bf64944c4e3b5afa7f26ca65509fd134b08f280f6dfc3e35
|
3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ab51c825d382c8d805b7a1989808541ebe02284dfc053fd0859047835f96957
|
3 |
+
size 778341034
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 2.316213369369507,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.4539,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 3.5787057876586914,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.5031,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 3.0300209522247314,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.514,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 3.986267328262329,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4123,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 5.813876152038574,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.5409,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 1.9155079126358032,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.3129,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 1.6175196170806885,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2733,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 3.6581971645355225,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.3098,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 3.621964931488037,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4081,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 2.364119052886963,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1815,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 4.366921424865723,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.7291,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 6.259946823120117,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5821,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 3.531437635421753,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.5318,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 9.066308975219727,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.9163,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 3.3480992317199707,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.4263,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 2.767130136489868,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2811,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 5.080353736877441,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.3363,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 1.7336921691894531,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5843,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 2.0092973709106445,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.4642,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 3.8756134510040283,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4807,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 2.4722533226013184,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.3807,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 1.6810188293457031,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.3137,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 3.1720468997955322,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4738,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 1.5610110759735107,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.2826,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 2.7629005908966064,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.554,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 6020445073571840.0,
|
190 |
+
"train_loss": 0.4498666000366211,
|
191 |
+
"train_runtime": 191.3091,
|
192 |
+
"train_samples_per_second": 1.045,
|
193 |
+
"train_steps_per_second": 0.261
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 6020445073571840.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04ff9214659409561dfbaa016ea376607a875853b6d1a4c0b5696d32eaa6435b
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d40603d57ef7a099d0a49f40d6c03d0849f0e70990ada3d6564746d8a047708
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0d185e922421d0f321483a04cf9ab247f33a88bae7fe6478e14d587ec62b615
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7676ca765d23fc6a1f0a8ab3064c4017ae1ec8dccfe4446023c38f944052a7f
|
3 |
+
size 778341886
|
client_states_ditto_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35e47f376b87f03095f830e87fa9eb4f84f5a0695a2984230753c25d7ad5a96b
|
3 |
+
size 778341034
|