Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_trainer_state.json +126 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_trainer_state.json +126 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_trainer_state.json +126 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_trainer_state.json +126 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_trainer_state.json +126 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9c8b688e7142618dcaf75b8821e014cceda3e8979fdce5093a9069a4a1a4ec9
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff1a052e82f4eec62a35dd8764b6aad1cbebfe9014a124bb5ba10ef4da7d8f00
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81e8decf5dcc4fb57f4b87504e65019c233bee4c47314f4ea16cb985e230fa53
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f1d5afb7dd124ccb5cc85da7598e84be573f582e7f939d1d62706208f509aba
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45586c6182565a1259c385222e9e0ce6254e0caf263527bc1e7d5a4329d199ca
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e284ece48ddd1a8a3b8dd541af6aa3681f6ea653d2297d78c229020ce04ee233
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3651434d61d8d6f5b342f17dbdf811e02b3e075489657fd6da20fbe7c30e969d
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d59111607207bd02602038a0e259bd8995126bbf701addb0c29168eb26291efb
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 25,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 2.21482253074646,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.254,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.32,
|
20 |
+
"grad_norm": 4.312131881713867,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.1656,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.48,
|
27 |
+
"grad_norm": 1.3984471559524536,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.046,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.64,
|
34 |
+
"grad_norm": 1.9168689250946045,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0957,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.8,
|
41 |
+
"grad_norm": 2.0672812461853027,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.281,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.96,
|
48 |
+
"grad_norm": 0.5915671586990356,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0233,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.12,
|
55 |
+
"grad_norm": 2.9408771991729736,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.4947,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 1.28,
|
62 |
+
"grad_norm": 2.808587074279785,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.1121,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 1.44,
|
69 |
+
"grad_norm": 0.9880923628807068,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0909,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 1.6,
|
76 |
+
"grad_norm": 0.9719083905220032,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0377,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.76,
|
83 |
+
"grad_norm": 3.7146573066711426,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1087,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.92,
|
90 |
+
"grad_norm": 1.7789965867996216,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.2933,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 2.0,
|
97 |
+
"step": 25,
|
98 |
+
"total_flos": 2776472045486080.0,
|
99 |
+
"train_loss": 0.16410446166992188,
|
100 |
+
"train_runtime": 127.3475,
|
101 |
+
"train_samples_per_second": 0.785,
|
102 |
+
"train_steps_per_second": 0.196
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"logging_steps": 2,
|
106 |
+
"max_steps": 25,
|
107 |
+
"num_input_tokens_seen": 0,
|
108 |
+
"num_train_epochs": 1,
|
109 |
+
"save_steps": 500,
|
110 |
+
"stateful_callbacks": {
|
111 |
+
"TrainerControl": {
|
112 |
+
"args": {
|
113 |
+
"should_epoch_stop": false,
|
114 |
+
"should_evaluate": false,
|
115 |
+
"should_log": false,
|
116 |
+
"should_save": false,
|
117 |
+
"should_training_stop": false
|
118 |
+
},
|
119 |
+
"attributes": {}
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"total_flos": 2776472045486080.0,
|
123 |
+
"train_batch_size": 1,
|
124 |
+
"trial_name": null,
|
125 |
+
"trial_params": null
|
126 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5d734864daeeef65eb7021dea9335af29f9eb355c5c24bd18f1f4c800053454
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84fb2ded09a75d5375a6580465a16635b5647ea5747b60010cf13c0ff84aad83
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:100f1537486f1307e3311d9dff294c4f3f7ea4855d42a169a52b0478a17f20dc
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:499d6a2dc853680f34ad981ad4ebcc92ade1be79e80bae61d9e21313f4749473
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbdfaa7ec410f0e21e4be7406985d711d6442247f71b2940a42598fede71a62a
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e599b8fca50af010e1b7f3b36515b2d844e07af4e70c799597761b47654e3f7d
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc518d42a7836ec09008231870f9f4a7ed123eaeeeb09ddeaa7890249744b85e
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:717b1af67a9b2cf721e2297314d6294216f3aacd81c35d45554c3b7af1d22e52
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 25,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 0.9727327823638916,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.1592,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.32,
|
20 |
+
"grad_norm": 0.6119529008865356,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0155,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.48,
|
27 |
+
"grad_norm": 0.3699932098388672,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.1359,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.64,
|
34 |
+
"grad_norm": 0.8428669571876526,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0421,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.8,
|
41 |
+
"grad_norm": 0.8032307028770447,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0221,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.96,
|
48 |
+
"grad_norm": 3.484509229660034,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.0962,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.12,
|
55 |
+
"grad_norm": 0.48723894357681274,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0181,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 1.28,
|
62 |
+
"grad_norm": 1.0781177282333374,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.0448,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 1.44,
|
69 |
+
"grad_norm": 2.618180513381958,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0524,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 1.6,
|
76 |
+
"grad_norm": 1.0576213598251343,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0496,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.76,
|
83 |
+
"grad_norm": 0.34326815605163574,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0519,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.92,
|
90 |
+
"grad_norm": 0.4327654540538788,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0183,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 2.0,
|
97 |
+
"step": 25,
|
98 |
+
"total_flos": 2795254289793024.0,
|
99 |
+
"train_loss": 0.056924142837524415,
|
100 |
+
"train_runtime": 128.058,
|
101 |
+
"train_samples_per_second": 0.781,
|
102 |
+
"train_steps_per_second": 0.195
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"logging_steps": 2,
|
106 |
+
"max_steps": 25,
|
107 |
+
"num_input_tokens_seen": 0,
|
108 |
+
"num_train_epochs": 1,
|
109 |
+
"save_steps": 500,
|
110 |
+
"stateful_callbacks": {
|
111 |
+
"TrainerControl": {
|
112 |
+
"args": {
|
113 |
+
"should_epoch_stop": false,
|
114 |
+
"should_evaluate": false,
|
115 |
+
"should_log": false,
|
116 |
+
"should_save": false,
|
117 |
+
"should_training_stop": false
|
118 |
+
},
|
119 |
+
"attributes": {}
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"total_flos": 2795254289793024.0,
|
123 |
+
"train_batch_size": 1,
|
124 |
+
"trial_name": null,
|
125 |
+
"trial_params": null
|
126 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04fecbc85501e3f0cbafe9f13108201d8d0cc03eee280d106fab39629624a568
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:900c97291ffa7ede12bdecdf80234c6c599d18f68c242a39b512946e6c2f8cf8
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13ca03010d565b6648c1c2de72ac1971fc3ee336ef441ce2e2aed880590d3b3a
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a1389dff1c58aca0343be04ac4ada1f12834b18b9ba70342bcb3bb424deb3eb
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53b54b947a97e393bb76780d4174ca2bbe266b6d5e26d162c4f4a73a0797ac57
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adb0272b9ee55de14b7c86ea70901588d1a894e126974d841e223f610b306e7a
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c44442470cf3659d87e0e4d6a63a497b9fbf71c54dce14abe746671e4579c02e
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e9064a8ac3feffdefe6bf934222792abbcf01e91958439277a311697049e0fd
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 25,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 3.542206287384033,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3763,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.32,
|
20 |
+
"grad_norm": 0.5754655003547668,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0481,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.48,
|
27 |
+
"grad_norm": 2.7304298877716064,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3178,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.64,
|
34 |
+
"grad_norm": 0.6067225337028503,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.1304,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.8,
|
41 |
+
"grad_norm": 2.869699478149414,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.3075,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.96,
|
48 |
+
"grad_norm": 0.8252665400505066,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.052,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.12,
|
55 |
+
"grad_norm": 0.7447052001953125,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.1314,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 1.28,
|
62 |
+
"grad_norm": 1.8536393642425537,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.156,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 1.44,
|
69 |
+
"grad_norm": 2.449882984161377,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4599,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 1.6,
|
76 |
+
"grad_norm": 2.2815041542053223,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1967,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.76,
|
83 |
+
"grad_norm": 1.0439902544021606,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0865,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.92,
|
90 |
+
"grad_norm": 0.8892500400543213,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.1329,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 2.0,
|
97 |
+
"step": 25,
|
98 |
+
"total_flos": 2742967366320128.0,
|
99 |
+
"train_loss": 0.20229598999023438,
|
100 |
+
"train_runtime": 129.0449,
|
101 |
+
"train_samples_per_second": 0.775,
|
102 |
+
"train_steps_per_second": 0.194
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"logging_steps": 2,
|
106 |
+
"max_steps": 25,
|
107 |
+
"num_input_tokens_seen": 0,
|
108 |
+
"num_train_epochs": 1,
|
109 |
+
"save_steps": 500,
|
110 |
+
"stateful_callbacks": {
|
111 |
+
"TrainerControl": {
|
112 |
+
"args": {
|
113 |
+
"should_epoch_stop": false,
|
114 |
+
"should_evaluate": false,
|
115 |
+
"should_log": false,
|
116 |
+
"should_save": false,
|
117 |
+
"should_training_stop": false
|
118 |
+
},
|
119 |
+
"attributes": {}
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"total_flos": 2742967366320128.0,
|
123 |
+
"train_batch_size": 1,
|
124 |
+
"trial_name": null,
|
125 |
+
"trial_params": null
|
126 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62a56161ae4aea304a7df9a9a113695fb5847d8c18e181bc080678fcc6fb4875
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b4398903d3135bc4481596590877e68395fdc308315b96f238f1c15f0d16e57
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:970b453869a990122c0c8f00dc95e9ff5ad81f43dfaa9885da5faf1fb482cc8d
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b858ad0106605ad21b9dd5c0180ee9eeed29295087cc8068531e7f9819399952
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4057d89e942597802e900271f93eb3596ee5ffd6f4e859a52c304255d17b01d8
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90afaea929330aaaa94280ce0cd71eefce98e1860fc5097ccd03750f0ae8c2b3
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:064b3b483d035f11c9129805ff0c111cf83b048c71e2b9e14c46ba3da0f819da
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66ed624f357ad2a852d69c1eadc605ac44e8b42f8db87c045ce8b8d10a5057c2
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 25,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 2.5665788650512695,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.6774,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.32,
|
20 |
+
"grad_norm": 3.874232053756714,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.4658,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.48,
|
27 |
+
"grad_norm": 2.8483521938323975,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.2574,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.64,
|
34 |
+
"grad_norm": 2.191157579421997,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.1927,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.8,
|
41 |
+
"grad_norm": 3.0909106731414795,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.311,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.96,
|
48 |
+
"grad_norm": 2.030684232711792,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.2952,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.12,
|
55 |
+
"grad_norm": 2.7925920486450195,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3441,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 1.28,
|
62 |
+
"grad_norm": 3.240939140319824,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.3284,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 1.44,
|
69 |
+
"grad_norm": 2.8805136680603027,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.351,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 1.6,
|
76 |
+
"grad_norm": 2.0048928260803223,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.4163,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.76,
|
83 |
+
"grad_norm": 1.862860918045044,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2851,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.92,
|
90 |
+
"grad_norm": 3.4276328086853027,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.3383,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 2.0,
|
97 |
+
"step": 25,
|
98 |
+
"total_flos": 2838752368001024.0,
|
99 |
+
"train_loss": 0.3459229278564453,
|
100 |
+
"train_runtime": 127.6119,
|
101 |
+
"train_samples_per_second": 0.784,
|
102 |
+
"train_steps_per_second": 0.196
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"logging_steps": 2,
|
106 |
+
"max_steps": 25,
|
107 |
+
"num_input_tokens_seen": 0,
|
108 |
+
"num_train_epochs": 1,
|
109 |
+
"save_steps": 500,
|
110 |
+
"stateful_callbacks": {
|
111 |
+
"TrainerControl": {
|
112 |
+
"args": {
|
113 |
+
"should_epoch_stop": false,
|
114 |
+
"should_evaluate": false,
|
115 |
+
"should_log": false,
|
116 |
+
"should_save": false,
|
117 |
+
"should_training_stop": false
|
118 |
+
},
|
119 |
+
"attributes": {}
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"total_flos": 2838752368001024.0,
|
123 |
+
"train_batch_size": 1,
|
124 |
+
"trial_name": null,
|
125 |
+
"trial_params": null
|
126 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaa403e2cab3dc6a153a854eafe26b8c044eace3fd625339aaa2f842064ce6fc
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ac8eadce9154f4ccc978ff304d55ad47849b685fa2a8d507d9412b4da249b5b
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc05bcb183cd20f96a0281e831fa50420f07adbcbaca8a204d153a639ea32559
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f444a8ed2f532a74b9e774e41a3f3738217f0c5956d255d807db6847712ae547
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6d0f1358c23df3d76db42b1a746bb4bccaefd9f82304f58330fd74aff529339
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dab3512773e75eaf70c2cc65274792a114aaf002f765de81df59c5ef3ee2cac
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e75fd605342d2ef9e69fe71a7a48af89c32d5940c0b1e0c7fe6ae1cda4426e9
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e15e9678154d5ba223b7e407839330af436f55e9d8a70e19ebd34091df309eb2
|
3 |
+
size 1167511866
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 25,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.16,
|
13 |
+
"grad_norm": 4.487705230712891,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.4972,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.32,
|
20 |
+
"grad_norm": 0.9496861100196838,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.1444,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.48,
|
27 |
+
"grad_norm": 3.0709917545318604,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.4052,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.64,
|
34 |
+
"grad_norm": 1.8949023485183716,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.2446,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.8,
|
41 |
+
"grad_norm": 1.549836277961731,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.1589,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.96,
|
48 |
+
"grad_norm": 2.86405611038208,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.1817,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.12,
|
55 |
+
"grad_norm": 2.9263083934783936,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2236,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 1.28,
|
62 |
+
"grad_norm": 3.8913233280181885,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4171,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 1.44,
|
69 |
+
"grad_norm": 1.5427923202514648,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.2518,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 1.6,
|
76 |
+
"grad_norm": 0.7112218141555786,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1182,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 1.76,
|
83 |
+
"grad_norm": 2.249495267868042,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2611,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.92,
|
90 |
+
"grad_norm": 0.6617293357849121,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0596,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 2.0,
|
97 |
+
"step": 25,
|
98 |
+
"total_flos": 3177992868069376.0,
|
99 |
+
"train_loss": 0.24374237060546874,
|
100 |
+
"train_runtime": 129.5612,
|
101 |
+
"train_samples_per_second": 0.772,
|
102 |
+
"train_steps_per_second": 0.193
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"logging_steps": 2,
|
106 |
+
"max_steps": 25,
|
107 |
+
"num_input_tokens_seen": 0,
|
108 |
+
"num_train_epochs": 1,
|
109 |
+
"save_steps": 500,
|
110 |
+
"stateful_callbacks": {
|
111 |
+
"TrainerControl": {
|
112 |
+
"args": {
|
113 |
+
"should_epoch_stop": false,
|
114 |
+
"should_evaluate": false,
|
115 |
+
"should_log": false,
|
116 |
+
"should_save": false,
|
117 |
+
"should_training_stop": false
|
118 |
+
},
|
119 |
+
"attributes": {}
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"total_flos": 3177992868069376.0,
|
123 |
+
"train_batch_size": 1,
|
124 |
+
"trial_name": null,
|
125 |
+
"trial_params": null
|
126 |
+
}
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbfc914d9fb77a9a27937b96bae1f4c35e85707f3dc2bca9f39e234f6b15da6c
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f71948f039dbd0c495f3488ae535041a78add0a84677408ae64140352e50b7a
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcfc5d301bcfcf1205745180936f62723fae2c47eef4b29122357a98aa6459c7
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0a1d7b998b28a6967c389f2f77276745d8a966dd0fed0fdc88c44477f0b59c2
|
3 |
+
size 1167513110
|
client_states_feddat_bs4_saveoptim_lr2e-5_sc310_4tasks_5rounds_fixitr25_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c56f66437b479eac7767ffb1c7468ce29eb22eefe6042a309c694a7ccfe53099
|
3 |
+
size 1167511866
|