Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json +392 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9391214ff1923b83752a8d3951b5d3401a5ec6d0246781854e0201f97f9f82b3
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8ff8dd214a728fe9583f1d2f40c92851b977c13963f43257431bd0afcbbad2c
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9bfe57805c7c5080f34d0a9b1d11f82f05e58fdce3169d5d4cab80a6b73d0ae
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:616a2c97b60a139f9cc9ddbc270586849cc31afc3bcce85ae5087b5fe4964eb3
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4882a2e3ee217c356fc14775ece3eb957872f6040ed76f9dd5fdf02601715dd
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3cbd61ffa32e5a09cab1b8df46c660a822400be159e93bb759a75e0c8897905
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbf69e6bab55d931c75b4629ca7f94f6d85e67a577f60d56d3ad0356e939e732
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f01537df585cceb4f79ff0535872d97637653cd1913821410649382c6045014b
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 6.59199857711792,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.5286,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 4.61942720413208,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.97,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 4.521786212921143,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3505,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 0.39112570881843567,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.115,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 3.506913900375366,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.669,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 7.119588375091553,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6728,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 4.159899711608887,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.4872,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 5.473998069763184,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.3448,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 9.59962272644043,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 1.1658,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 5.037639617919922,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.437,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 3.210973024368286,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.2022,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 3.4891958236694336,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.3925,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 2.502493381500244,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.5882,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 0.41307923197746277,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.6127,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.3616442680358887,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6964,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 5.551742076873779,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2612,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 4.289650917053223,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.3091,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 13.974275588989258,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.8659,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 2.4527721405029297,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.7772,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 2.5927505493164062,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.162,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 0.6561378836631775,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0605,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 4.918743133544922,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.7217,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 0.39737483859062195,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.1857,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 14.40439224243164,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.4849,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.6556407809257507,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.1272,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 9.867362976074219,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.4252,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 2.099479913711548,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.0928,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 4.53439474105835,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.3139,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 7.238653659820557,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.6104,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 1.539478063583374,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.1581,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 1.8394383192062378,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.4265,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 0.7109262943267822,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.1265,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 1.0773893594741821,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.2364,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 3.935823917388916,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 1.0373,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 7.714654922485352,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.6007,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 8.1398286819458,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.8645,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 7.816657066345215,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 2.3697,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 7.676031589508057,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.59,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.7220346927642822,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.9622,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 3.681994915008545,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.4974,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 7.639248847961426,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.9375,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 9.04971694946289,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 1.22,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 1.4018713235855103,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.0818,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 4.88040828704834,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.7108,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 2.9837918281555176,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.3657,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 1.0587419271469116,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.3112,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 9.619867324829102,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 2.6249,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.8182443380355835,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 1.052,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 5.4298481941223145,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.983,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 1.9551576375961304,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.3735,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2722175404474368.0,
|
365 |
+
"train_loss": 0.6631992340087891,
|
366 |
+
"train_runtime": 215.6578,
|
367 |
+
"train_samples_per_second": 1.855,
|
368 |
+
"train_steps_per_second": 0.464
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2722175404474368.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88cec14609ad32c290c19fe99cc2626c1573cbc85f114434a313145999b97e84
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df187ae799f263db8566f5385dc79e9c2623b1d6957e1928f94a77e61051ea6c
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ae4134c8cf9bd49bd9e9d46343957346c41793b1be9d466e80cfecfcea6f24a
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5289d21311312be0fde45820e0a508ba5cbfd66bb88d64b7ec96bbf4065de637
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c1da2662d2df70b5cf7a9cbac9949d8be681b8e616a3e9f8bd8f573d3ae40e3
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cf4c5a1502c71922c0c95fafe0bced2fd2386a6dcb782663f81d80fe38eae27
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11cabf8f804c392f7578109c0fc6aeb86b7f26cd200fd331d5756dd441870339
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:058f762da3f1f0d383fcdfdbaa9c44ff0a75249cec5d1d061055f5c8bc46789d
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 0.849377453327179,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0279,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 0.02175198495388031,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0578,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 0.5898083448410034,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.0414,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 0.41922903060913086,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0256,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 0.0018654951127246022,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0004,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 0.004117058124393225,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.1077,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 0.047437746077775955,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.0009,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 1.518896460533142,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.349,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 0.045308228582143784,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0889,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 0.11829675734043121,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.0026,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 0.005238677840679884,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.4929,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 0.2110355943441391,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.008,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 1.4381576776504517,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0896,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 16.31382942199707,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.2398,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 0.05612191930413246,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.0021,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 0.22438876330852509,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0076,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 1.866287350654602,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.0595,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 0.045663584023714066,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0009,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 0.2734965682029724,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0132,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 0.5363508462905884,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.0329,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 0.382310688495636,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0192,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 8.9566011428833,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.3838,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 1.7171825170516968,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0176,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 1.6939023733139038,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0231,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 0.0025089113041758537,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0026,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 0.06536436825990677,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.2801,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 0.20245826244354248,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.0073,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 0.018265284597873688,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.0028,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 0.13041305541992188,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.1354,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.010534190572798252,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.0962,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 0.2692118287086487,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.0085,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 0.038933683186769485,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.0019,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 0.018930355086922646,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.0017,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 0.7976124286651611,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.0334,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 0.2943456470966339,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.0089,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 0.12224985659122467,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.0067,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.3579569458961487,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.0677,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 0.02683216519653797,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.0016,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 3.3340158462524414,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.2447,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 0.009762264788150787,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.0083,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 0.024613451212644577,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.0116,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 0.009135313332080841,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.0008,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.2523867189884186,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.4089,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 0.32133349776268005,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.0093,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 0.09947264194488525,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.0039,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 0.12009107321500778,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.0069,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 0.007163307163864374,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.0081,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.4114590883255005,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.066,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 0.04830743372440338,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.0046,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 0.04225878044962883,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.0019,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2743212791824384.0,
|
365 |
+
"train_loss": 0.0904453244805336,
|
366 |
+
"train_runtime": 221.9331,
|
367 |
+
"train_samples_per_second": 1.802,
|
368 |
+
"train_steps_per_second": 0.451
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2743212791824384.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9be8024aa2eece99882c85b8046475baa83ad6283113525243ad4265e9bf7576
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce08ca256ac2358ea58f9bf1b6e1e689255df91a4f52dbeb33536a4dcc474533
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c586d85bfc4ed525de5cc0bcd8e9bb1def55fc41b70014035e84fbbdfc08625b
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d608fe1ce0b07b208d805224fc9aa2685c3edeca580c162648006f6425393528
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2072f36c89ed2416f35894c760f95c059d0947e40d2c851e6559e50ac1c610dd
|
3 |
+
size 487635186
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99426839dd4d5f67e775bb10a4c52b99e22e14e2de2c8b8d162e9059a49513cc
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8586e15be816067f89397fd74efde646b7f477c27e24148b917e3bfa10b0d3e
|
3 |
+
size 487635186
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dd9c04cdc272cfff8bddd92cc280c2385a2797d1a2f3a196861109207940d86
|
3 |
+
size 487635186
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 1.3100306987762451,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.3864,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 4.013766288757324,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.9063,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 2.3978607654571533,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.1349,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 4.109842300415039,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.825,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 2.427557945251465,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.5722,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 4.734681606292725,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.0887,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 0.10447093099355698,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.5304,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 2.675856828689575,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.9177,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.6946303844451904,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4756,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 1.9363477230072021,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.7104,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 0.7683027982711792,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.1338,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 5.035861492156982,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.9689,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 3.8543343544006348,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.523,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 3.667877435684204,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3602,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 3.0984058380126953,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.5042,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 1.3304017782211304,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2943,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 2.7662222385406494,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.5895,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 0.6082299947738647,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.1146,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 2.9511778354644775,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.7031,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 3.04605770111084,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.3958,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 3.8611412048339844,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.8662,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 4.399799823760986,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.287,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 2.2531864643096924,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.3218,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 1.4663535356521606,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.0107,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 1.7521519660949707,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.124,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 8.896435737609863,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.8135,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 2.5376131534576416,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.511,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 6.814590930938721,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 1.0585,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 2.1924495697021484,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.1282,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 0.17645888030529022,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.0467,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 2.022798538208008,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.7755,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 3.513547897338867,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 1.0105,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 2.7813096046447754,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.3313,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 14.315655708312988,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 1.5528,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 3.795628309249878,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.5101,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 1.2288278341293335,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 1.374,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 4.386161804199219,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.1973,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 3.3581271171569824,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.6263,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 1.225866436958313,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.1825,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 7.104559898376465,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 1.1801,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 13.105498313903809,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 1.7541,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 0.2902798056602478,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.0271,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 4.769597053527832,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.5574,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 3.8983893394470215,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 1.3119,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 3.6488382816314697,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 1.0188,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 0.29925721883773804,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.5366,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 3.2376351356506348,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.6949,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 0.63726806640625,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.2721,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 5.418401718139648,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 1.4491,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 2.0275309085845947,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.5931,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 5059805727162368.0,
|
365 |
+
"train_loss": 0.6651638150215149,
|
366 |
+
"train_runtime": 277.116,
|
367 |
+
"train_samples_per_second": 1.443,
|
368 |
+
"train_steps_per_second": 0.361
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 5059805727162368.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57fec58061a92ab1b1bf4a4c6012fc1397a46c90b77813f7cdd58f7cb3fe2ed3
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3401ff987c93089fb123edc132faa3c3adae7422977093002adb8f8d9588d91
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ac62e5e6fe65b335575f7ed7e9b1e3e428debec1fca2e10d9dc2123665b121b
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6901f0e6d410fbd830c0624606aa6c27c103266b25c9e6b95ab4a281cc6bfc6
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaaf2162f9a52ec84d0a5306af230ee2e2524d8b3ab4f9007c0eb79f88a34f95
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa5e605e3d74782eebb64e65d6103a2c51abb8a7defe4b8c0cf0e4b3c764165a
|
3 |
+
size 302202238
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2765067dfd32667733c18170c3f622d661e26844689a324651bb49e31c0531eb
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a34222b6b3c027b0a0631a8b22b43c25f1371bfd82e5daf1543cf536e3c8723
|
3 |
+
size 302201386
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 4.537669658660889,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.832,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 1.7357145547866821,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.6956,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 4.840099334716797,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.1172,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 6.74650239944458,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.2902,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 5.235438346862793,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.8775,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 7.184224605560303,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 2.0581,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 12.11132526397705,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 2.1586,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 5.101541042327881,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.0109,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 4.515460014343262,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.7775,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 1.4222784042358398,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.935,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 4.021170139312744,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.8757,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 2.8799550533294678,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.809,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 0.9323534369468689,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.4597,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 2.9174280166625977,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.3155,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 3.026035785675049,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6032,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 8.995691299438477,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.9903,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 1.847169041633606,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 1.3635,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 9.795331001281738,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 2.0643,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 1.7796157598495483,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.4105,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 4.427263259887695,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.8952,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 15.724753379821777,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 2.4695,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 6.148226261138916,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 1.2556,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 1.515079140663147,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4941,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 1.410451889038086,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.2573,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 4.407503604888916,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.8896,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 8.208944320678711,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 2.3883,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 1.5512627363204956,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.6002,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 5.655065059661865,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.9759,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 2.2757630348205566,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.481,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 9.758919715881348,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.1768,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 4.106739521026611,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.7397,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 4.5539231300354,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.7899,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 3.6534392833709717,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.6942,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 4.095523357391357,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 1.1672,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 6.486148834228516,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.2393,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 8.842077255249023,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 1.7434,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 8.89773178100586,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.2772,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 18.22632598876953,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 3.3775,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 2.2898244857788086,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.7762,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 7.303566932678223,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.6705,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 4.615819454193115,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.4729,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 3.424020290374756,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.6039,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 2.209282636642456,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 2.1192,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 1.9839006662368774,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.9141,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 1.8658138513565063,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.9538,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 5.851518630981445,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.7171,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 3.3957180976867676,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.8128,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 1.1212002038955688,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.3877,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 3.886902093887329,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.8257,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 3.7381937503814697,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 0.7051,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 2779357248487424.0,
|
365 |
+
"train_loss": 1.0703019142150878,
|
366 |
+
"train_runtime": 215.987,
|
367 |
+
"train_samples_per_second": 1.852,
|
368 |
+
"train_steps_per_second": 0.463
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 2779357248487424.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69d4760f0c1959606186ed7e2dbc53f991bd01e88e3a73b20618c9bcbd214386
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4447cf5a3e3ba4fc20c85906454383543302b54a45eb6230e015b355a7c620c
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c4e788867c89e82a865f5c263bfc27372f9ad1c131ee5f73ba1257cb1358df0
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4132f75da66ae6d2d1d4e9d126dcf9907beda51bc8637a5d299f80f47e1dc1b
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83ae2b5822fb41da2f51fa255253ef5fe5e0742b6941f2e9d52243a28d00e24f
|
3 |
+
size 487635186
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea8bb07c77a2cfd707f77600756cadf906298b5c948248212f3d5d1787fded51
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4d08d3691c5ba154f8ed2e0f222d7e52e43fe806519ae3751d1bec29a8d4105
|
3 |
+
size 487635186
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e3655617efa0261b43cf077e8d338e5d7409043547ee972bf9bb851282182c3
|
3 |
+
size 487635186
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.02,
|
13 |
+
"grad_norm": 3.9793167114257812,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.9603,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.04,
|
20 |
+
"grad_norm": 2.5188374519348145,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.8116,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.06,
|
27 |
+
"grad_norm": 1.3143935203552246,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.6426,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08,
|
34 |
+
"grad_norm": 1.7295637130737305,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.7914,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1,
|
41 |
+
"grad_norm": 0.7666940689086914,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.5176,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12,
|
48 |
+
"grad_norm": 1.7987310886383057,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.2534,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14,
|
55 |
+
"grad_norm": 2.515049695968628,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.5228,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16,
|
62 |
+
"grad_norm": 4.360015869140625,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.2806,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18,
|
69 |
+
"grad_norm": 1.8536200523376465,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4718,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.2,
|
76 |
+
"grad_norm": 2.595823287963867,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.5098,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.22,
|
83 |
+
"grad_norm": 3.5519330501556396,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 1.5173,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24,
|
90 |
+
"grad_norm": 1.3497133255004883,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.9349,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26,
|
97 |
+
"grad_norm": 1.930624008178711,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.4916,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28,
|
104 |
+
"grad_norm": 3.4734158515930176,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 1.0893,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.3,
|
111 |
+
"grad_norm": 2.4782814979553223,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.8819,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32,
|
118 |
+
"grad_norm": 3.521637439727783,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.9765,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.34,
|
125 |
+
"grad_norm": 1.3706670999526978,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.4721,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.36,
|
132 |
+
"grad_norm": 5.157536506652832,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.672,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.38,
|
139 |
+
"grad_norm": 1.5482473373413086,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.3766,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.4,
|
146 |
+
"grad_norm": 5.12284517288208,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.2043,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.42,
|
153 |
+
"grad_norm": 2.4150655269622803,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.9744,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.44,
|
160 |
+
"grad_norm": 2.1841928958892822,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.5246,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.46,
|
167 |
+
"grad_norm": 5.618422985076904,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 1.2388,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.48,
|
174 |
+
"grad_norm": 2.524454116821289,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.5944,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5,
|
181 |
+
"grad_norm": 1.7722740173339844,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.4012,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.52,
|
188 |
+
"grad_norm": 4.978190898895264,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 1.5566,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.54,
|
195 |
+
"grad_norm": 3.1553709506988525,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.945,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.56,
|
202 |
+
"grad_norm": 0.8509775400161743,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.1817,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.58,
|
209 |
+
"grad_norm": 3.637312173843384,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.7817,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6,
|
216 |
+
"grad_norm": 5.024761199951172,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.0443,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.62,
|
223 |
+
"grad_norm": 1.6964753866195679,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 1.0813,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.64,
|
230 |
+
"grad_norm": 2.6202216148376465,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.8394,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.66,
|
237 |
+
"grad_norm": 2.0826215744018555,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.6085,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.68,
|
244 |
+
"grad_norm": 1.6744186878204346,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.5914,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7,
|
251 |
+
"grad_norm": 3.6541945934295654,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.8268,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.72,
|
258 |
+
"grad_norm": 2.257369041442871,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.5257,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.74,
|
265 |
+
"grad_norm": 0.468746542930603,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.5201,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.76,
|
272 |
+
"grad_norm": 2.3300929069519043,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 1.035,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.78,
|
279 |
+
"grad_norm": 3.127511739730835,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.8523,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8,
|
286 |
+
"grad_norm": 2.3533546924591064,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.3409,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.82,
|
293 |
+
"grad_norm": 2.2223410606384277,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.5418,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.84,
|
300 |
+
"grad_norm": 2.9352030754089355,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.7653,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.86,
|
307 |
+
"grad_norm": 0.8046193718910217,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.3562,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.88,
|
314 |
+
"grad_norm": 1.5213556289672852,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.484,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9,
|
321 |
+
"grad_norm": 2.6822187900543213,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.8862,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.92,
|
328 |
+
"grad_norm": 5.5709662437438965,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.9033,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.94,
|
335 |
+
"grad_norm": 4.302905082702637,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.0037,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.96,
|
342 |
+
"grad_norm": 2.346940279006958,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.4615,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.98,
|
349 |
+
"grad_norm": 1.4154908657073975,
|
350 |
+
"learning_rate": 2e-05,
|
351 |
+
"loss": 0.2501,
|
352 |
+
"step": 98
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 1.0,
|
356 |
+
"grad_norm": 7.068972587585449,
|
357 |
+
"learning_rate": 2e-05,
|
358 |
+
"loss": 1.4809,
|
359 |
+
"step": 100
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 1.0,
|
363 |
+
"step": 100,
|
364 |
+
"total_flos": 5838906972962816.0,
|
365 |
+
"train_loss": 0.7595100402832031,
|
366 |
+
"train_runtime": 275.7767,
|
367 |
+
"train_samples_per_second": 1.45,
|
368 |
+
"train_steps_per_second": 0.363
|
369 |
+
}
|
370 |
+
],
|
371 |
+
"logging_steps": 2,
|
372 |
+
"max_steps": 100,
|
373 |
+
"num_input_tokens_seen": 0,
|
374 |
+
"num_train_epochs": 1,
|
375 |
+
"save_steps": 500,
|
376 |
+
"stateful_callbacks": {
|
377 |
+
"TrainerControl": {
|
378 |
+
"args": {
|
379 |
+
"should_epoch_stop": false,
|
380 |
+
"should_evaluate": false,
|
381 |
+
"should_log": false,
|
382 |
+
"should_save": false,
|
383 |
+
"should_training_stop": false
|
384 |
+
},
|
385 |
+
"attributes": {}
|
386 |
+
}
|
387 |
+
},
|
388 |
+
"total_flos": 5838906972962816.0,
|
389 |
+
"train_batch_size": 1,
|
390 |
+
"trial_name": null,
|
391 |
+
"trial_params": null
|
392 |
+
}
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7061a373da978d284035594b21fa90359b7f63246265529bf3bbd4424d47ab77
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea9f5a3d5256c4d387533a530ca53a00edf9a3dd5b539d359ef244972f191740
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0d2247c87168cd25007195226e5a2d0cc8da9ff1ad46d1e494b8a8a64bc353b
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd9f2bd53bd05c8e603b94af226a34333ecaf4dc93ab7e920e7ef6ffed5709e6
|
3 |
+
size 487636262
|
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa4f85fb161c9eb53205e83989ae34fdafca370bf153f995e1e6f4ac63320fdf
|
3 |
+
size 487635186
|