Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json +392 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json +392 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json +392 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json +392 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json +392 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fa789cff960ae4ee81fa9ac6a075adcf63e56a3ec6c3ecd5699d4f789f3c213
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:432f8736e99ab8b1e39223df1d48f017f6fd4f4f9643ca359d4b99141e4f4d74
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41c8fc312b07e6987f1125962ff398f8861f24936ce222a149189568327cac67
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:051032624bbe04510f16be6fdc206a54f23b25aa871002ca00fef79182e7c513
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d675c4b87e3db30f58b3688f502ecdfd3b852d0a28b2d067cc72ca763b08cd5a
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:381ce3ba71ec63c166839e10280a43a1e9f31dc4d7fa46bef9e6fa158c302e49
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01e8733bb21c24aa296509cc99e8f4f9b3259d52e55bf918ee4c9f835985258f
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89fe3d8b4f7d27dbc7e4251a76c701fd44c4ad0c7fadf149e72d9c82c2e77db3
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 9.423497200012207,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.9426,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 6.242947101593018,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.4941,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 0.23292511701583862,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.0161,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 3.2664003372192383,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0807,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 16.083553314208984,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.5403,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 13.203125,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.3032,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 9.931619644165039,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.4531,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 12.197830200195312,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 2.0777,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 13.418411254882812,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 2.5869,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 7.835398197174072,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.2992,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 0.8039246201515198,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0811,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 7.014680862426758,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 1.1046,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 8.817255973815918,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.8185,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 2.18149733543396,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.1982,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 1.396234154701233,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.7965,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 9.15268325805664,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.5244,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 5.599765777587891,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.686,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 11.052386283874512,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 1.5529,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 2.8186702728271484,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.6692,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 4.434682846069336,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.5903,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 4.227272033691406,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.2824,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 5.939194679260254,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 1.3968,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 0.5867003798484802,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.393,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 6.319277763366699,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.6232,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 2.46071195602417,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2416,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 3.9734086990356445,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.6859,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 1.4034255743026733,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.5754,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 1.3656154870986938,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.1789,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 4.575096130371094,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 2.5298,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.9366658329963684,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.1539,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 1.2089002132415771,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.5569,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 2.383746862411499,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.3214,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 0.41840168833732605,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.2939,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 2.619239330291748,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.6163,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 3.0113186836242676,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.5063,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 1.8169399499893188,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.3002,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 4.200991630554199,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.5329,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 1.631651520729065,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.6569,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 0.40286803245544434,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 1.0856,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 2.7753989696502686,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.9605,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.5652052164077759,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.3553,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 5.572319984436035,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 1.0,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.4662155210971832,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.1469,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 2.5111005306243896,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.9844,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 2.1454522609710693,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.3873,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 1.0339059829711914,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.2139,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 4.232385158538818,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 2.1749,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 1.8107025623321533,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 1.4807,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 4.099456787109375,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.5927,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 0.4381089210510254,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.2756,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2185214759534592.0,
|
365 |
+
"train_loss": 0.8263820886611939,
|
366 |
+
"train_runtime": 70.7407,
|
367 |
+
"train_samples_per_second": 5.654,
|
368 |
+
"train_steps_per_second": 1.414
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2185214759534592.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ca00ad56c9180596acb64d02ed1c2c93b5167313f4ddacae17fe6151ab9c155
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5292459abad60fd048050fb5779edeaa54e68fd64f6d8f672e54415ffd378568
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c51878f3c0c54cdc54eb26bf35a2757735003b92f7da1689081ec193e89b87b
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95a9f87437d27a76a6cac22a0d23d1b90a201e43d7c601b4f2178ea763b5880d
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d126b766b20e0cad09d67d67cb29db4c25a011f24be748b251d8b5cf6288b90a
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0ad85bffca2bf517e6a2294ecc419a949579749fae404a75f6f3de58d5c4be5
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e1e47d59fb16d75680a1f95491ce48c1a01605d42bde715afab635cd1fd4fc4
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8eb669eede9faee34f088d7edbeb9c774a119de291ab6f2ebf3c071d1346dc1
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 0.06926380842924118,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0983,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 0.14224855601787567,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.5132,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 3.1999995708465576,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3034,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 1.366185188293457,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0975,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 0.047930456697940826,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0054,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 2.5389022827148438,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.1265,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 4.9087395668029785,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3968,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 0.5400022864341736,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.2126,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 0.17520087957382202,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0546,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 0.15257954597473145,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0091,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 0.7264464497566223,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1006,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 0.2980533242225647,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0133,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 0.862314760684967,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0667,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 0.18892519176006317,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0091,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 0.21099655330181122,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.0126,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 0.5386524200439453,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0495,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 4.457860946655273,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.2528,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 0.006588024087250233,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0005,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 0.25489550828933716,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0122,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 1.0599042177200317,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0402,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 4.65377950668335,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.1825,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 5.81795072555542,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.5606,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 0.06573299318552017,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0047,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 0.15902456641197205,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0069,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.09095041453838348,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0076,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 0.03288710489869118,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.1018,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.8101238012313843,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.0885,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 0.018732983618974686,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.0025,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 0.014856848865747452,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.177,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.006819794420152903,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.0792,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 0.015503552742302418,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.0177,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 0.02056262083351612,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.0141,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 0.32354819774627686,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.0082,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.28828832507133484,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.0255,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 0.011394195258617401,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.002,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.08238676935434341,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.004,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 1.0231951475143433,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.0829,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.0049722520634531975,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.0011,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.5577210187911987,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.1744,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 0.060515161603689194,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.0044,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.24229663610458374,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.0266,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 0.003239632351323962,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.0009,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.019305258989334106,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.5092,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.03198835998773575,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.0061,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.4420487582683563,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.0412,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 8.323692321777344,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.6126,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 0.013138143345713615,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.0034,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.41053083539009094,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.0296,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 0.00738116167485714,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.0008,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 0.04163911193609238,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.0028,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2202644449853440.0,
|
365 |
+
"train_loss": 0.10307437002658844,
|
366 |
+
"train_runtime": 69.5445,
|
367 |
+
"train_samples_per_second": 5.752,
|
368 |
+
"train_steps_per_second": 1.438
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2202644449853440.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:475ad4672ec3aaec5117fffb04c560d3ac5d8223d0494c2bc0036bf8aeeb4787
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2073eda13fa87d39360d6fd49065146220dc0190e1004c319eb392aee6a48546
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd0388c3619655d1858ead432f4146aae463cdfad671f284a6b5fdff3e9e119e
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57bc54c8c645bd8a7a7d98994d2b745350a66ffc36b63c1b3485be984ab5a62c
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53ef0ae39d317134427d6d59afb8c58f3a573b7b4c9b00a837033c99411b1a27
|
3 |
+
size 778341034
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:333195ee6334bf482e3581bf003c5a1b2bdfbda17a5bce80a1c5e88263b3d4a6
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:309ab816121588cffe22b87774005c82c50e3940c8226ade89e6c6edc2ed71f9
|
3 |
+
size 778341034
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4706bf42b3020bbe7065ab079de8559f0a6a271f8b90c7dc1a72ba00aaed5e7e
|
3 |
+
size 778341034
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 0.42672601342201233,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1273,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 3.3164169788360596,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 1.0253,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 1.0701144933700562,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.1081,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 3.5641934871673584,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.917,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 2.482414960861206,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.4307,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 3.699124336242676,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.2775,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 0.19477766752243042,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2769,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 0.49098673462867737,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.227,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 4.419643402099609,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.6207,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 2.067509412765503,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.6495,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 0.7661958336830139,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1506,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 4.098026752471924,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.6787,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 3.8598406314849854,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.2822,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 2.6059610843658447,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4346,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 0.4412726163864136,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.2262,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 1.919790506362915,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3238,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 2.1158552169799805,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.7178,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 1.9022879600524902,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.2602,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 1.8349590301513672,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.679,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 5.065424919128418,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.0498,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 2.6241636276245117,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.824,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 1.375793218612671,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.1406,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 4.631248950958252,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.9059,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 0.9230762124061584,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3381,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 3.146935224533081,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.3658,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 1.643314242362976,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.4425,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 1.577388048171997,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.6525,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 1.5418776273727417,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.545,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 1.3298715353012085,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.2741,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.20194341242313385,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.5407,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 1.6116507053375244,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.6341,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 1.1075984239578247,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.5064,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 0.6221591234207153,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.2389,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 1.730831503868103,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.353,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 1.8647536039352417,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.9568,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.6666922569274902,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.9053,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.4998483657836914,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.0881,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.8337864279747009,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.3308,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 0.7664095759391785,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.144,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 3.212735176086426,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.9583,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 4.194940567016602,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 2.3249,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 0.21294273436069489,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.0391,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 1.9137557744979858,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.6782,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 1.2017444372177124,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.5466,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 3.704470157623291,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 1.9585,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 0.09351445734500885,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.1403,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 1.1362736225128174,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.4254,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.6755289435386658,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.2894,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 3.5850534439086914,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.7788,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 0.5499467849731445,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.16,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 5196654105853952.0,
|
365 |
+
"train_loss": 0.6389807415008545,
|
366 |
+
"train_runtime": 132.4801,
|
367 |
+
"train_samples_per_second": 3.019,
|
368 |
+
"train_steps_per_second": 0.755
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 5196654105853952.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3137dc1711718282da550f4845cd36baab7c46bbf11b76105eef0fead3e874c1
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:defed3f7caba8d08a8444293265279252bac754f91960c09be90e83e3f42290f
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10e379d1ddbbac4869c911e4aa2261e7e062f1759e7a20a5e7ffe738d3b4ece2
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:611b55b9aafd1b4e11c27bcf80a22e33756eef1bb1c1de102cf91676dd074463
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5576200561a80efa43148ce2e7831d31afb0e4696dd074723a8f63eea37962a3
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5b02587ad1dd9f38df651a6f97486fbd2a5654af5e637c8fcaba6350c37ba07
|
3 |
+
size 360880622
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:594714875d0223582d0341af340f41258ef6b01e6633ed2f1b31835f1fbcb28f
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9acc6f735922a0d67535d40544015c6d4cd1d2d4a9d1702e6bd5541a29358dce
|
3 |
+
size 360880106
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 11.701602935791016,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 3.2041,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 1.8877856731414795,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.8482,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 3.9416866302490234,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.9832,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 8.076401710510254,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.7619,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 9.02495288848877,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.1751,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 6.70341682434082,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.7742,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 9.935924530029297,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 2.1147,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 6.356525421142578,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.8387,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 4.266024112701416,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4111,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 9.628840446472168,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.1686,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 6.305365562438965,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.5419,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 3.4979939460754395,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 1.4611,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 1.904814600944519,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.5461,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 5.557872772216797,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4092,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 3.297398567199707,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.5477,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 14.574881553649902,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 1.0394,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 2.348663806915283,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 2.8679,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 8.440045356750488,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 2.2365,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 6.058783531188965,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.8523,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 5.3567728996276855,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.1006,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 15.160209655761719,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.5179,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 10.718804359436035,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 1.9688,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 3.9706368446350098,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.554,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 4.072225093841553,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.1259,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 4.832661151885986,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 1.2787,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 4.9963531494140625,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 2.5343,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.30591192841529846,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.7795,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 3.4619483947753906,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.957,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 1.5531309843063354,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.753,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 3.0114293098449707,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.3273,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 2.4710679054260254,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.9157,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 2.4386227130889893,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 1.0244,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 2.135855197906494,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.8826,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 2.907819986343384,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 1.173,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 2.032712459564209,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.9032,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 2.388005495071411,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 1.9712,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 3.9849488735198975,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.2745,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 7.177330493927002,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 1.5408,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.1500422954559326,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.589,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 3.8292155265808105,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.8088,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.9888606667518616,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.1827,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 2.494753122329712,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 1.0098,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 1.3061927556991577,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 1.1736,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 1.5556581020355225,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.6252,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 1.5772370100021362,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.7691,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 4.533968448638916,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.5719,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 2.4935195446014404,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.7637,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 2.4782304763793945,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.3888,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 2.847276210784912,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.1708,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 3.366889715194702,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 1.3215,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2232466265866240.0,
|
365 |
+
"train_loss": 1.15478422164917,
|
366 |
+
"train_runtime": 72.3437,
|
367 |
+
"train_samples_per_second": 5.529,
|
368 |
+
"train_steps_per_second": 1.382
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2232466265866240.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93f65beb7bbd25136510c6187fc11e9ca55eaedcabba3b14033692495c02899c
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaeba29721fb62a98ac7f919a031d53e300c961e5a7541ddce722693348166e5
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec7698a9fcb5af2885504255c07c54acce127c28f1a81ddf7f9850673e0719ef
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d090f3e1e20b1615cc8945e72b93472544277b7189ef83362446f27ba8a0c30
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:591958c16e67ee2f5bf6f1fd0f51c4455c7585e8e45468af114daff5756e3268
|
3 |
+
size 778341034
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f300b973c4d938700cfca9fae9a52cf471ee38d054e0060f6d88b1dc551cfa20
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1db0a6df166d5e9f5df8f18244a5ccc530772bebf1d2da9d366e863e05f2618d
|
3 |
+
size 778341034
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8104378821d6474f3a6c351716ffb4f1db29be362a71d37564a8be300b60419
|
3 |
+
size 778341034
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 0.5440601110458374,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3859,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 2.486060380935669,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.8744,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 1.5172414779663086,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.0364,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 1.87302565574646,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.6833,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 1.2508922815322876,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.5988,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 2.049877643585205,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.2546,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 3.000075101852417,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 1.0645,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 4.159180641174316,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.0189,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.9412723779678345,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5621,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 2.991361141204834,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.406,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 1.989998698234558,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 2.1827,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 1.5404460430145264,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.9485,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 1.243625521659851,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.4014,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 2.017669200897217,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 1.1306,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.773820638656616,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.9518,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.3499462604522705,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.8406,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 1.1517057418823242,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.5119,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 3.779822826385498,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 1.2535,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 1.4071242809295654,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.6125,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 2.478247880935669,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.1099,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 2.338315725326538,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.0911,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 1.6824209690093994,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.7209,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 3.2349324226379395,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.9425,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 2.572331666946411,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.6688,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 2.569999933242798,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.6548,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 0.5937522053718567,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 1.0816,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.9653764367103577,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.4465,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 0.6579816937446594,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.2058,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 1.3638180494308472,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.7284,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 1.4289367198944092,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.8351,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 0.7946614027023315,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.6298,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 1.661765694618225,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.8354,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 2.3677399158477783,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.0779,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.8689314723014832,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.532,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 1.7545336484909058,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.8644,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 1.1046079397201538,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.4699,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.21095605194568634,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.593,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 1.4656115770339966,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 1.3938,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.224704384803772,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.7324,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 1.486828088760376,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.5396,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.9084728956222534,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.554,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 0.9191360473632812,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.6183,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.7869934439659119,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.3396,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.42631539702415466,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.401,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 1.4716026782989502,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.8724,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 1.6058681011199951,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.5681,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 1.3189148902893066,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.7323,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.9830902814865112,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.6886,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 0.8136429190635681,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.2411,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 3.1793293952941895,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 1.0366,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 6021565481222144.0,
|
365 |
+
"train_loss": 0.7584884357452393,
|
366 |
+
"train_runtime": 150.4749,
|
367 |
+
"train_samples_per_second": 2.658,
|
368 |
+
"train_steps_per_second": 0.665
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 6021565481222144.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b224a628b3734d5758dbf6796aed6dd04a1b1d0d81b99063131a5f87f032729
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa0b171c9c908292b73b092761beef17fde357b8526a9689a4c1a431bef82d5b
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef71ba45a93d892d931909dd81bac80459da02b0bf561f2c594f640c6c01992a
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78b990fbebb884361e12c185520abbd6ce9ae62e5e4f4903c4863f11b8a265cc
|
3 |
+
size 778341886
|
client_states_feddpa_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc9b6db8edd59dbbd02b347056aa744f332dd711a32e98dd04fd3259d4cf4c56
|
3 |
+
size 778341034
|