Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48b96c362a78aed601019971f00d6027bed72a085f801c5708c469b30b56e704
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffa113a087044d45e9458ac9c0cc467fc5d4efc7adf19f478aa37bcbf0caaf2f
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e235f913c8f73283f7d46940c3d7efa2db6ba791511a72d27267d2ce1e562533
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38483e00ef8f3a3423e44729285077bc1efbc42c6112450c0be7069f15fde5fc
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:424f036eb3473b0625a9b07031fccf681a4aa443d3d85c2dee0a124fc093d62e
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffb30d25ae19dd6ca1f01a3f066d39b79c10c0ae3937a2722a15f790a8acc3da
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16e8b9983821b40d2bccba0cb8aceacdc83d0f24b2a9d503b06bbd0e29d536c0
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce653e9bb0f893fc5b3c40b624e4375f73e384bfee5a88fcfe1d8838f649f6a2
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 1.9091740846633911,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.2221,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 3.158586263656616,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 1.0034,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 0.46037667989730835,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.0698,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 4.0920891761779785,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.6852,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 0.10336245596408844,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0561,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 2.5557003021240234,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.5174,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 1.9013806581497192,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.3884,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 0.5343479514122009,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.0633,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 1.3988330364227295,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.4331,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 0.7816677093505859,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.1989,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 1.7695223093032837,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.3803,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 2.075623035430908,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.6594,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 1.132662296295166,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3909,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 1.9386407136917114,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.9071,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 1.1776388883590698,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1846,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 2.3658692836761475,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.2869,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 1.8401161432266235,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.4268,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 6.151878356933594,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.8957,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 1.4062762260437012,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.1458,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 1.788042426109314,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.1687,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 3.1902451515197754,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.6988,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 4.431184768676758,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.4197,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 0.8136569857597351,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.0892,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 0.2823968529701233,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3095,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 2.880483627319336,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.2591,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 5.056119918823242,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.8428,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 0.07207974791526794,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.0128,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 1.1768360137939453,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.1406,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 3.5694682598114014,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.5909,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 0.23515310883522034,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.325,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 0.7991934418678284,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.2351,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 0.023807095363736153,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.0073,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.7518260478973389,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.0999,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 2.0427582263946533,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.176,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 2.179276466369629,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.139,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 0.9427316188812256,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.0631,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 4.460165500640869,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.643,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 3.386460542678833,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.632,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 2.405658483505249,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.2599,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 4.817083358764648,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.834,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 5.088555812835693,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.4744,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 0.05794494226574898,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.6645,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 0.06005644053220749,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.3662,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 0.3105262219905853,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.374,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 1.043837547302246,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.2253,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 3.3620216846466064,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.0345,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 0.1824847310781479,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.1119,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 0.09524671733379364,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.1297,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 2866814929338368.0,
|
351 |
+
"train_loss": 0.4281441053164374,
|
352 |
+
"train_runtime": 417.1615,
|
353 |
+
"train_samples_per_second": 0.93,
|
354 |
+
"train_steps_per_second": 0.233
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 2866814929338368.0,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf5b1b07a962367241a2790e3b47841d377d0a5859a24e0bd572bbb2e9a5dad4
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcaa8b85b90be439bd3bf93fa1b50be8a1a524c0e2551727713f22186123d1f0
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad9f7eab0ddc66d1c63eaf3d04cce2d870464e591c10a0a0cd59c04943163477
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a74dcff81385ce7d01fd715bfe9cd9115994a33b4f2116a5a42c2e3b9691c0b7
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a7215205419c641a43021b684e80c528adf84563ea906d6c2e68c6843e62d71
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ebdc8a3d437a1ef847589521c74c09d29530143a00ceb9bef8a458f47d6c12c
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45bb1653c987aae96fe205a4517a078a0ac54e4b545c86cea875b9dabae7b576
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0793485bc61d7c2e0c75e7ca0f596a805ee2682aaf4bbb6109607da5e979984
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 0.00990249216556549,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.0343,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 0.24752184748649597,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.0285,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 0.11016274988651276,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.1125,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 0.015819694846868515,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.0193,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 0.0025153865572065115,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.0022,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 0.018475506454706192,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.002,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 5.484030723571777,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.5337,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 0.01962272636592388,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.1908,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 0.02493014559149742,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.0014,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 0.09554272145032883,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.71,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 0.007511932868510485,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.0032,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 0.6068776845932007,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.0584,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 0.019293755292892456,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.0174,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 0.006605098024010658,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.0035,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 0.38141685724258423,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.4474,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 0.03302409499883652,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.0047,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 6.83150577545166,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.324,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 0.10726940631866455,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.0058,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 0.44090333580970764,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.0431,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 0.19889701902866364,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.7427,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 1.034143090248108,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.0863,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 0.22705499827861786,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.357,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 0.7536527514457703,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.1098,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 0.056017301976680756,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.0064,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 0.006744038313627243,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.0006,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 0.9016902446746826,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.1382,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 0.11894352734088898,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.0104,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 0.39221423864364624,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.032,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 0.09858262538909912,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.049,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 0.005559508688747883,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.1089,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 1.6431413888931274,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.1877,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 0.004421388264745474,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.0127,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 0.002455125330016017,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.0007,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 2.288174629211426,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.593,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 0.5138911604881287,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.7277,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 0.2915093004703522,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.0794,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 0.012635215185582638,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.0644,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 0.5793916583061218,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.2267,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 2.189505100250244,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.6962,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 0.004708629101514816,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.0011,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 0.007471214048564434,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.0018,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 0.014228110201656818,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.0012,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 0.01875714771449566,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.0231,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 0.010902749374508858,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.0009,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 0.6271221041679382,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.0795,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 1.3592969179153442,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.106,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 0.4871022403240204,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.0463,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 0.0031653214246034622,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.0005,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 2888162800041984.0,
|
351 |
+
"train_loss": 0.1450096553133935,
|
352 |
+
"train_runtime": 415.6208,
|
353 |
+
"train_samples_per_second": 0.934,
|
354 |
+
"train_steps_per_second": 0.233
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 2888162800041984.0,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:117c5018b1144fbe01a8a0b3e5b5f28be996c6cd1ced5dd5ebae894b7b20764f
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46882a8e42fbde28008f5736f9a52d0589cd87b83849c4fd9c2719a7737d7caf
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0198020029a280ceab5c502fcc1630094eccd304142cbe991b7716d5e76909
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bb825c00a750f5b116888f6e06e8a0f1899b7080a2228e4f85b242dabe7b805
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0285e24aa92adaeb50c6942d77c9baad30255eac5a6072995e1689b161fae85e
|
3 |
+
size 978818810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc759a1821037662e3bc31ea98126c55b6efe62a25eff727fe0d32a32c1ecc96
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a25b0ef58db4a2b67f05037fa400effaa9f3be59bae025b818883e74667a446d
|
3 |
+
size 978818810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1e176a28f5e8d10fdc2c78c18da653d7d6816ea6950bedf534d204526cd4a1e
|
3 |
+
size 978818810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 1.143397331237793,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.5443,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 1.2255475521087646,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.9587,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 1.8370898962020874,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.182,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 0.24408237636089325,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.2572,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 0.2356729656457901,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.3236,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 0.12980803847312927,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.359,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 0.34007528424263,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.1969,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 0.8254105448722839,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.7104,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 0.665677011013031,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.2014,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 0.9692237973213196,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.6621,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 0.43489089608192444,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.6234,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 2.537994861602783,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.9041,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 1.01339590549469,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3681,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 0.37009069323539734,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.4056,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 0.044184956699609756,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.1972,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 1.3115153312683105,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3582,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 0.7034705281257629,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.2604,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 0.5956244468688965,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.9219,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 1.4682765007019043,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.7098,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 2.516416311264038,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.1477,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 2.4257423877716064,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.0164,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 0.3192073404788971,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.3794,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 2.1369898319244385,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.5324,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 0.22400034964084625,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.7004,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 1.9823366403579712,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.9041,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 1.1424986124038696,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.2122,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 1.1508723497390747,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.2551,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 1.047311544418335,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.3889,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 0.28297775983810425,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.2793,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 0.2760946452617645,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.4845,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 3.4843385219573975,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.725,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 0.1845403015613556,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.2025,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.3841183185577393,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.6707,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 0.6219921708106995,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.3708,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 1.2532652616500854,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.6669,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 1.8839783668518066,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.4824,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 1.2260621786117554,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.8137,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 1.1083022356033325,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.9023,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 0.7099221348762512,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.3594,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 1.9942265748977661,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.4561,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 3.795022964477539,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 1.0328,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 0.9263180494308472,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.6605,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 1.9401521682739258,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 1.3343,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 1.5276206731796265,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.6242,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 2.310457229614258,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.854,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 1.974561333656311,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.8861,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 2.190361499786377,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.0974,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 1.6531275510787964,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.7672,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 5270224836231168.0,
|
351 |
+
"train_loss": 0.6526356726577601,
|
352 |
+
"train_runtime": 527.3005,
|
353 |
+
"train_samples_per_second": 0.736,
|
354 |
+
"train_steps_per_second": 0.184
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 5270224836231168.0,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7f38637be273d636ae4bf42e45ac88bd68d613b9db39c4011385158e57cfc82
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15d4a001460770d6133cfe80cb6b1118d692eac140ab64b0b5dbe135a59035d9
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79870074898564ea9937eef2cb84f972f35f15ef60ff83d4d5e9221c910c1649
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5072ca902f0faeb5daf4f968b0859f0fd9c37293147851a3bf30d319ee022741
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89b72835c7dcc77117ee65d0b8a56db8bcef201336153949791586c6cf628dbb
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5da7ae67d07bffcb1ca91e721216e9bb57862eaf28b3fd5621c55e61375a5e0
|
3 |
+
size 606590838
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0ddcc5243cf0cbb1c7689c4e85a09a645f7f24a043fbffea59babe8a120ff12
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b38eb64e42265ae8776bbe390aeea5b07ff1fa63c9ecc1aeae053eae53259c4c
|
3 |
+
size 606588810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 0.8621726036071777,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.784,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 0.5997222661972046,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.6114,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 3.3685293197631836,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.8636,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 1.9444336891174316,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.7377,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 3.8980281352996826,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 2.0884,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 3.079683542251587,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.1494,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 3.4189367294311523,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.9661,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 1.382783055305481,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.0355,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 5.828529357910156,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 1.5671,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 2.367591142654419,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.8609,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 1.9332449436187744,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.9275,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 1.3459978103637695,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.8898,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 1.5858796834945679,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.3414,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 1.9047499895095825,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 3.4836,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 3.298762321472168,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 1.4902,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 1.7490943670272827,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.759,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 0.9601039290428162,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.5406,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 1.696427345275879,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.5591,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 4.497976779937744,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 1.48,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 1.4220675230026245,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.9096,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 0.5704763531684875,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.0739,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 3.798910140991211,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.8083,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 0.6530976891517639,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.3467,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 2.5659232139587402,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.2449,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 1.7021549940109253,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.991,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 2.904054880142212,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.9027,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 0.40748944878578186,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.3164,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 1.6402074098587036,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.4264,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 3.439208984375,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 2.0284,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 2.8119544982910156,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.5881,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 1.5405571460723877,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.9022,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 4.333232402801514,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 1.2758,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.6036462783813477,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.7932,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 0.596666157245636,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.7625,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 2.8570806980133057,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.7929,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 2.321223020553589,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 1.2169,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 4.413544178009033,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.3534,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 1.6249202489852905,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.6887,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 3.379758834838867,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 2.4109,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 1.7662537097930908,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.8549,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 2.5819923877716064,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.6496,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 2.989832878112793,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 1.1886,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 1.8118072748184204,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 1.149,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 1.4145994186401367,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.8608,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 1.3375070095062256,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.9514,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 5.697417259216309,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.387,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 3.187375545501709,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.7515,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 4.534388542175293,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 1.2819,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 2928335550152704.0,
|
351 |
+
"train_loss": 1.1240325613120168,
|
352 |
+
"train_runtime": 413.8678,
|
353 |
+
"train_samples_per_second": 0.937,
|
354 |
+
"train_steps_per_second": 0.234
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 2928335550152704.0,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ede28fdd2cede10ccf106490cbc2da115e8236fc942f0fc8573f1f3127100ea
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a41cc09a59ab65452ba45d80dcde938e20a6d1c74313d8d819237825ecc5c129
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0231971052f8cadaf5af44030be3e4a0d1119e6cd551875513b30a19e2ba16ae
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d03f1b6ddfea3320be70db4eec5aaea28b3cb748e1b15df3c1c0241f193f5de4
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c540936a1e1cfa6e1e8ac3eab7ab2ad6ef4730a3d071d311b3d2e6888cfda4f
|
3 |
+
size 978818810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b455529478842b55f61e1722446272f3df530a1f55a1a44c5e36df6879739386
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4c4f909649374c7493c39988abbb17b7a6ec2b86497510258894a8ca3398f0e
|
3 |
+
size 978818810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a6fd9c6fe317a9688edc469a71b86ddcb698e23fbe087692d2d4d5f8bda91a5
|
3 |
+
size 978818810
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 1.9402724504470825,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.5838,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 2.4250800609588623,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 1.2304,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 1.5540610551834106,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.3604,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 0.7538492679595947,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.6599,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 0.9257169365882874,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.6932,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 0.5299786329269409,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.5977,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 1.8260445594787598,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.7848,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 1.0459568500518799,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.423,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 0.7857280373573303,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.5644,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 1.1462950706481934,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.446,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 2.205085039138794,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 1.4026,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 0.7716929316520691,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.312,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 1.7518846988677979,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.9049,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 0.8513244390487671,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.7163,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 1.708772897720337,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.848,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 1.2805875539779663,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.8176,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 0.7299635410308838,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.3226,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 0.9453825354576111,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.4467,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 0.4364389181137085,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.4499,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 0.4604692757129669,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.7041,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 0.42262014746665955,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.5528,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 0.2717072069644928,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2196,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 1.1014915704727173,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4718,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 0.48280566930770874,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.1461,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 1.16571843624115,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.7124,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 0.14320193231105804,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.7823,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 1.8387656211853027,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.3319,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 0.9942914247512817,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 0.4071,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 1.0944980382919312,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.617,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 1.0097112655639648,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.2987,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 1.1440106630325317,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.7794,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 0.9374860525131226,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.737,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.6988123655319214,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.7242,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 1.1150574684143066,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.2495,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 1.318420171737671,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.0947,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 0.7494012713432312,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.4376,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 0.5291847586631775,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.1724,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 0.8617143630981445,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.552,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 0.4218270480632782,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.1633,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 1.3452715873718262,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.8789,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 1.967393398284912,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.4333,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 1.4400347471237183,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.9299,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 0.30658483505249023,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 1.0109,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 0.16921880841255188,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.2108,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 0.4752196669578552,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.4654,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 2.074636936187744,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.8971,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 1.7106537818908691,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.0818,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 0.6808597445487976,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.6208,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 6081635527163904.0,
|
351 |
+
"train_loss": 0.6636996711652303,
|
352 |
+
"train_runtime": 526.4308,
|
353 |
+
"train_samples_per_second": 0.737,
|
354 |
+
"train_steps_per_second": 0.184
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 6081635527163904.0,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e1ca40ac7a86695907ed50ab92b3322e850ce3c6b20d88160df8fea655233b1
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd30b73f84f40cfc88d63e9fc388b5f992dd60422583b803e4554908460b5e99
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7daa10dfe387d89f37538ddaf13b254637fcf67fec290c83210cf4e18f936ec8
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84e1d84adca8d5f3beff7b2330eaacb77d10fc1382a4a63da515bd4d30c4402e
|
3 |
+
size 978821398
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ace73a466487a0e201e84be96ae47e72b535f683a7e17c5ddf448191f327469e
|
3 |
+
size 978818810
|