Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
- client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d15399f0edd1f91b4a2f47beace1738d4cf8d5dfaf2b45b7305d79b9bbfb21d
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:428d26a3c9c1a279a65ec5b0271ef9313bcd1f8fbe7136f6fb36e13a611444be
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05cb0bc452be8c9b338be41388d1f2439111df0c7b8092f1ba38101984a33837
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7b33073b2d4e01b918ce6d556504297b87242a7269ecd544dd4a98da48dc7b7
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f18e59a96390eb2de1672fb33aea5d2558b0f3efc4ddd9f390822d557c52c42
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cba7ce9936a84aad1f88255fc48671eccf33c457c95b6877beedce99407c7636
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ca8e4bf48ae2c4fadc9a0324efef2e5fec14ec7cfeeccc0ac29b6813d8bb1b1
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54258bf82a4d6ed0bdd02ff79c8dce338184fbab57262563baae4a52792eb45c
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 20.356369018554688,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.581,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 8.037093162536621,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.3628,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 9.684240341186523,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.4963,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 13.099488258361816,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.5486,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 24.806184768676758,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.2746,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 12.736031532287598,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.4477,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 8.170167922973633,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2535,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 6.432157039642334,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.1452,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 2.9350321292877197,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4796,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 5.1196184158325195,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.5928,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 8.371356964111328,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.5357,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 10.118005752563477,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5303,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 8.173060417175293,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.5055,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 8.020771980285645,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.2534,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 3.0385186672210693,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.3931,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 6.484845161437988,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.5293,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 3.5226542949676514,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1518,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 3.8682730197906494,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.1667,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 15.682740211486816,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.9048,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 3.051661252975464,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.2347,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 4.373608112335205,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.8616,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 1.7194321155548096,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.1839,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 8.273662567138672,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2419,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 4.200174808502197,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1707,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 4.162522315979004,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.4122,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2184907602264064.0,
|
190 |
+
"train_loss": 0.530304090976715,
|
191 |
+
"train_runtime": 112.0178,
|
192 |
+
"train_samples_per_second": 1.785,
|
193 |
+
"train_steps_per_second": 0.446
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2184907602264064.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1e0290f254e1384dc1b37b17cd1a2fb8a4a8bf7a62e1995251d0c059fe3036a
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6f460736d1f5f9bcd282f631bd6fb842444343cf3d88d95d4f800b704aa5da1
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01463999f5efe8d57300fee5391f52db0442384ea286c5d9d33b02743486a83d
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c68012f74213cbac7a5c1607e2e1e0f117a9c05b2885cc5b05407538fb77bc2f
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e3fce46215518ed15bbadbfadf2e2c58fe601c749ff93d6fcb857c43f5208f1
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4158de6adb3088e4e92a8742807ff01855816aa9185bd87b5850e634ea97bb34
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4152f5720d2ccf17af9d3d343bd5f35a6af271f91a7685a8af2139b6784fdfdc
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55a2902e50a5d007c58fc2031cd980d500510dba91d1f562e637011185bec243
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 1.7138079404830933,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1539,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.025427162647247314,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0094,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 0.03731539845466614,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.0288,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 4.670400619506836,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0634,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 0.008197980001568794,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0131,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 1.292601466178894,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0206,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 1.6769487857818604,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2863,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 0.16871647536754608,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0143,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 16.038557052612305,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.2802,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 1.5586574077606201,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0268,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 0.052973054349422455,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.013,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 0.4838956296443939,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0118,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 2.2010490894317627,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0197,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 0.02648143470287323,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0095,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 0.015569723211228848,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1625,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 0.07271519303321838,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0134,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 12.36640739440918,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.1975,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 0.011994317173957825,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0186,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 0.07830987125635147,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0099,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 0.245117649435997,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0106,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 0.09306483715772629,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0116,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 1.065843105316162,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0152,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 0.019034242257475853,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0096,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 0.014849173836410046,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0973,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 0.41532012820243835,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0126,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2203723937873920.0,
|
190 |
+
"train_loss": 0.060387180894613264,
|
191 |
+
"train_runtime": 113.8328,
|
192 |
+
"train_samples_per_second": 1.757,
|
193 |
+
"train_steps_per_second": 0.439
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2203723937873920.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55e9d3fd73bbdcad52830e269e2f3893da64867291b0aee9a0e4955d33c75627
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a85b9f1aa53edaad8c7a91bca1a3c70f20eb871f8dd521b068458c7141fc76c
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87a567a7777a1278ff501e2a4345b52e9eb9aeb5e0a651e43d5f8898fd830e16
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8291d8f62e0d8059d91b1002ed6f1e1830c888064a6e305bc691db4e3a7c5be
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d467059117666b4bb77ae3dfce1433137ba25364531e1f1648c6de90643de6cd
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c86f50e0f4809ed2809088768d554284ed9990868438e53b93eed85ac993822a
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0199d3ec64caff6ce480c8099992a2541bbee8a3b53ba12c681a7a19ab123831
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b7717a34391c3b362458f2148b4270e4a5682bd69eda667d4aa4d3b9e4ad72e
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 8.67927360534668,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.5073,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 1.070449709892273,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0556,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 5.557805061340332,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.179,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 5.5926971435546875,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4056,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 3.1736576557159424,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.137,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 8.38901424407959,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.3255,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 2.2440390586853027,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.4894,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 2.3557586669921875,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.424,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 2.672835350036621,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.272,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 2.5702593326568604,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.2785,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 0.3932490646839142,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0989,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 4.769493579864502,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.254,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 5.8169331550598145,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3336,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 0.9339334964752197,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.1087,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 4.087733745574951,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1163,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 7.105349063873291,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.7171,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 2.8530874252319336,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.2644,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 6.338785648345947,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5313,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 5.0571136474609375,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1923,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 7.593774795532227,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4613,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 11.923439979553223,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.7395,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 0.8395790457725525,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.0462,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 3.3043808937072754,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2629,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 9.646453857421875,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3714,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 4.387805461883545,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2219,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 5203228526379008.0,
|
190 |
+
"train_loss": 0.31173837661743165,
|
191 |
+
"train_runtime": 189.427,
|
192 |
+
"train_samples_per_second": 1.056,
|
193 |
+
"train_steps_per_second": 0.264
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 5203228526379008.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f4954490d41567c600c766cb6260cf6575e0faa298da32365174a9be869b521
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5e9244b1cfbf88b52befc18b724e7cd671e4eb407aabd8884118c9e9e5212dd
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b6c22f2e8e6010de5e540eed7cd6d9544224ba58ebc34ec3b66db8fccdc0a6d
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67f6953201fb2a4dde70231b9bc251dcbcc1777c95f3625e0d02a3b94a895caa
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d843fd5a0b30e5d2423185c7f7c1087228c7ae93460fd1db50dff217bfa5ea9
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e46d20265302b91f3f24e0d48d23aeb7fa6a7185fc4e7b7ad9e7e02243bed549
|
3 |
+
size 360880622
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:975b40293671bbdeb5d32200b2bd64f0890718dc69b9e7162e2fb26a36afe6a1
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d853326567172e3fdbcb84b8d3f5e7c1f022f8b7bc03a0334be7fcdcf3bab33
|
3 |
+
size 360880106
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 18.76683235168457,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.0907,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 12.490086555480957,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.7188,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 15.25229549407959,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.6253,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 22.94184684753418,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.2405,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 5.571078777313232,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.389,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 9.62364387512207,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.7317,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 7.308901309967041,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.5968,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 4.2408223152160645,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.5853,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 8.469420433044434,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.6742,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 4.550015926361084,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4569,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 8.425732612609863,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.5865,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 10.20775318145752,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.4898,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 15.613037109375,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.8773,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 7.8877410888671875,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.8102,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 11.263969421386719,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.8425,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 10.401885986328125,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.7032,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 8.376611709594727,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.4214,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 3.5057554244995117,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.4259,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 14.423259735107422,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.474,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 5.601011753082275,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4506,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 8.93952465057373,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.795,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 7.401601314544678,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.4657,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 6.547579765319824,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.524,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 4.996316909790039,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.8237,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 14.771405220031738,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.9451,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 2233493086011392.0,
|
190 |
+
"train_loss": 0.6697625303268433,
|
191 |
+
"train_runtime": 113.9082,
|
192 |
+
"train_samples_per_second": 1.756,
|
193 |
+
"train_steps_per_second": 0.439
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 2233493086011392.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc8f43e8703563b6486c90073e04b43b429fa3218145830d8a090d468ec437bd
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99730f58c467e3669a4d4713dd9b1451cff4b93a58ae46c46497eb8f731facfd
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6edd260c1245ae17fbbd8bdc254dc7d9c62b4151143c13392a97e053e4bc420f
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ed393604bc7cd9f3ae87cd5c96a132a1a9ee3dba8c55b6a3a0aaa11c23731c2
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a06037709047bff4a95ef960ee58dbdfe9a84bcda95a019105db9e35084c5945
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21ba21d09b33136f5e652150ade2f8ff774b5b30eb0209d0e1b4eccfda6525f8
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5ace8e0b834acde0cd7f0b3c99ab0c668c3019d4f8c61d1e257934541a3bb25
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49d74bc1d4d6cd847082f58e5ab66e88b532413a1e07e096ea389507b3a50d0d
|
3 |
+
size 778341034
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 50,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08,
|
13 |
+
"grad_norm": 0.42625540494918823,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3228,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.16,
|
20 |
+
"grad_norm": 0.7395199537277222,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0953,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"grad_norm": 4.357167720794678,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.342,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.32,
|
34 |
+
"grad_norm": 3.030724048614502,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.453,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.4,
|
41 |
+
"grad_norm": 4.075497150421143,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.2039,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.48,
|
48 |
+
"grad_norm": 3.159764528274536,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.3253,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.56,
|
55 |
+
"grad_norm": 4.061643123626709,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2518,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.64,
|
62 |
+
"grad_norm": 0.8145078420639038,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4027,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.72,
|
69 |
+
"grad_norm": 2.392146348953247,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.1145,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.8,
|
76 |
+
"grad_norm": 4.058164119720459,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4689,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.88,
|
83 |
+
"grad_norm": 1.3680384159088135,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2133,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.96,
|
90 |
+
"grad_norm": 8.989691734313965,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.486,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.04,
|
97 |
+
"grad_norm": 8.30005931854248,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.6934,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.12,
|
104 |
+
"grad_norm": 12.295815467834473,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.7844,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.2,
|
111 |
+
"grad_norm": 4.061346530914307,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.2592,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.28,
|
118 |
+
"grad_norm": 4.065572738647461,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.5118,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.36,
|
125 |
+
"grad_norm": 3.6354613304138184,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.472,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.44,
|
132 |
+
"grad_norm": 5.219471454620361,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.413,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 1.52,
|
139 |
+
"grad_norm": 4.679935932159424,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3027,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 1.6,
|
146 |
+
"grad_norm": 3.4444127082824707,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.4347,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.68,
|
153 |
+
"grad_norm": 2.349905252456665,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2921,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 1.76,
|
160 |
+
"grad_norm": 3.327099323272705,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2939,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.84,
|
167 |
+
"grad_norm": 2.9319257736206055,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.2592,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.92,
|
174 |
+
"grad_norm": 2.7675695419311523,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.4071,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 2.0,
|
181 |
+
"grad_norm": 7.027470588684082,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3092,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.0,
|
188 |
+
"step": 50,
|
189 |
+
"total_flos": 6019810878029824.0,
|
190 |
+
"train_loss": 0.3644888877868652,
|
191 |
+
"train_runtime": 192.4302,
|
192 |
+
"train_samples_per_second": 1.039,
|
193 |
+
"train_steps_per_second": 0.26
|
194 |
+
}
|
195 |
+
],
|
196 |
+
"logging_steps": 2,
|
197 |
+
"max_steps": 50,
|
198 |
+
"num_input_tokens_seen": 0,
|
199 |
+
"num_train_epochs": 1,
|
200 |
+
"save_steps": 500,
|
201 |
+
"stateful_callbacks": {
|
202 |
+
"TrainerControl": {
|
203 |
+
"args": {
|
204 |
+
"should_epoch_stop": false,
|
205 |
+
"should_evaluate": false,
|
206 |
+
"should_log": false,
|
207 |
+
"should_save": false,
|
208 |
+
"should_training_stop": false
|
209 |
+
},
|
210 |
+
"attributes": {}
|
211 |
+
}
|
212 |
+
},
|
213 |
+
"total_flos": 6019810878029824.0,
|
214 |
+
"train_batch_size": 1,
|
215 |
+
"trial_name": null,
|
216 |
+
"trial_params": null
|
217 |
+
}
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c46f9b30ec1ddd0705e19e3a27e0ee5ae37bfc4c2d9a99b2d33be8dab34eab79
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0bd3170badc104b1340132038527d6226113ca6eebce39a3f6699be2b0d1e46
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f97139241266520411df7976e108c2040ec519ca76fb4cb87b88ad7696cf6c6
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67fe3a24e6bec05583d408402670efcf642e612602b2bff16363992dfcd4e812
|
3 |
+
size 778341886
|
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eda87c5048819b2eb77ff1681af2797f1098905aae19565a8d5df6a3a59e038c
|
3 |
+
size 778341034
|