Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round2.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round20.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round5.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round7.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_trainer_state.json +378 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round10.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round12.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round15.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round17.pth +3 -0
- client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac6e3c20c9ce2b3bd22a4ec405cfe0e9c4823d415ddd9d0a1432c7f7a7821ab6
|
3 |
+
size 369838470
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9faab1abc5585336f9c5217cbf9229feae7f158d14775ec6b0465ecdcd6b72c
|
3 |
+
size 369838470
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b911d7298ebdb70e1147918b61c660749e1a0eb1428cf862812392dc3e79ee0
|
3 |
+
size 369838470
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99c914f9fec69d46834a0247afa323ef2d204838bfc58478b2b70b659c6c7837
|
3 |
+
size 369838470
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43cd654d47bea4a0664e185ffcb76f37acdef0f55d2203b478aee301074cec65
|
3 |
+
size 369837282
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:482a35fcc3ebc3cf030093ca637eeb0b4f1a908aa49998263fd26a4f21738ec7
|
3 |
+
size 369838470
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abab9ede2af77a1461e2d522138e4f5e882bed4af399616ec0dd66577cfdc753
|
3 |
+
size 369837282
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f38813bfe5d6e1f323702ba4809bf6db6af166cb14a2717a8eb9f1ef775cdc1
|
3 |
+
size 369837282
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 1.2302707433700562,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.6424,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 1.6121233701705933,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 1.0052,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 1.4117467403411865,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 1.005,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 3.630777359008789,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 1.6888,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 0.9374276995658875,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.7314,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 0.9001209139823914,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6316,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 0.801906168460846,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.9834,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 1.1132997274398804,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.3636,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 1.1756536960601807,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.3419,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 2.3688271045684814,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.8968,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 8.31472110748291,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.8141,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 1.499045491218567,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5582,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 2.175128936767578,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.2834,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 1.3025474548339844,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.898,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 1.6166173219680786,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 1.2673,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 0.9590756297111511,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 1.3175,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 1.4882543087005615,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.6644,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 1.2179570198059082,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.7493,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 1.3930878639221191,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.7374,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 2.189868688583374,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.9137,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 1.1282020807266235,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.6859,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 1.5559532642364502,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.528,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 2.0104541778564453,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.9586,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 4.215638637542725,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.0792,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 3.2594962120056152,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 1.2841,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 0.7323676943778992,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.0893,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 1.5159685611724854,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 1.5317,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 4.473369598388672,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 1.2966,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 5.313100337982178,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.911,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 2.3561482429504395,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.8127,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 2.3545758724212646,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.7019,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 7.38244104385376,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.86,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.3104522228240967,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.3289,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 2.5590906143188477,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.8105,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 4.937007427215576,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.1369,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 6.499209880828857,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.8859,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 6.662985801696777,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.5822,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 1.9580082893371582,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 1.1448,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 1.697409749031067,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.8685,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 9.267684936523438,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 2.1197,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 0.6946778297424316,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.6537,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 4.137977600097656,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 1.5048,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 2.1147782802581787,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.9791,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 2.045584201812744,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.5647,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 1.7340083122253418,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 1.0669,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 2.2718918323516846,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 1.3045,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 2.3538527488708496,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.7428,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 1.2346324920654297,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.5767,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 5656240185147392.0,
|
351 |
+
"train_loss": 0.9453033073661253,
|
352 |
+
"train_runtime": 235.4567,
|
353 |
+
"train_samples_per_second": 1.648,
|
354 |
+
"train_steps_per_second": 0.412
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 5656240185147392.0,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4eb57f4da4d9af2c8f88eec2c68cce411eb4ace1cd7b4b8c6cfcb2932e44e1af
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:599c1c0f44c2450b50611cbf10979f3176c7fb5c97b9994e162e9afbe6589e1e
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61bc0585554474b7a6c7a46de7c089e6d367f8d84a3f2082dda108068613b90a
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a364994d7ccff9361b49b861cbbf4336eb34dd9e46c7921c637fa2c7a9532a6a
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6d174170815e6f16f125a4b43e29ade7e900a8e7534df2954323e02bae74339
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:085787736ea4d59641527424baeb286c6851230bafe4e1c92069427d0e2fe907
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4319cfba2f0947e6c39dae27f1f6e85fe0967d560cf49a26499af35aeba4b27
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3091e8b368a7015e0b43b5db1be0a826707a5eb9ae318b117c885ee90397875a
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 0.8781272172927856,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.8515,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 4.043600082397461,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 1.9452,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 1.0472830533981323,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.6496,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 1.3015058040618896,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.5628,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 0.2546836733818054,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 0.5908,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 2.3884270191192627,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 1.1725,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 1.4381201267242432,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.827,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 1.5257450342178345,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 1.3457,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 1.5005308389663696,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 1.1501,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 1.9017939567565918,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.8049,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 1.4530093669891357,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.8718,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 0.9238858819007874,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 1.1926,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 1.5560870170593262,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.8745,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 3.8156371116638184,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 1.4656,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 2.274550199508667,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 1.2095,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 2.0850868225097656,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.546,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 1.1981834173202515,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.5663,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 5.242424011230469,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.7796,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 1.4221488237380981,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.5759,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 1.140813946723938,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.8574,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 2.981257200241089,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.6415,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 2.198453187942505,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 1.9304,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 0.601679801940918,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.4595,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 1.553512692451477,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.9711,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 2.1878437995910645,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.7843,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 1.1170060634613037,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 1.3049,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 3.8993043899536133,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 1.1269,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 3.7384300231933594,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 1.3041,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 4.556199550628662,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.5492,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 1.3752775192260742,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.0508,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 5.783763885498047,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 1.7019,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 1.1487605571746826,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.7288,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 2.0315375328063965,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.0151,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 1.0512689352035522,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 1.0146,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 2.663738250732422,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.2133,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 2.6531965732574463,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.8632,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 2.310678243637085,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.6255,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 1.3333702087402344,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.8103,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 2.0063538551330566,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 1.2381,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 0.6322288513183594,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.584,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 1.8914169073104858,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 1.0259,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 2.7294955253601074,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.9706,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 3.710840940475464,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 1.2648,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 1.3264449834823608,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.5923,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 1.280088186264038,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.6826,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 0.8928223252296448,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.5134,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 4.573293209075928,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.0831,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 2.4279978275299072,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 1.0474,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 1.3528534647570432e+16,
|
351 |
+
"train_loss": 1.0155205480831186,
|
352 |
+
"train_runtime": 413.9788,
|
353 |
+
"train_samples_per_second": 0.937,
|
354 |
+
"train_steps_per_second": 0.234
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 1.3528534647570432e+16,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e41781055739cec33a2f49d21003d32c53009bfc48cb66d73c87eaf6b9828f6
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:470dcb87433ec035468bec134fe6c4d50e8a8f668c2c833a26ea926f11dbf049
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1640ce7785ab8e8e7902397e1908cbfc1011e1d5154f0e12e336affd18cd2cf
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3338370d88d25ad5a15f7a9160a5b9c280ec60de889a0caceb9ece4f4b6237a4
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41bf4f1754b8137517a3a1f7c53e997d179bcddef4baab07d74c51e28d2ad3df
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7857f8fb11d2880bf457dd12ed9ca204bf7a5eef168bdcd8d1a2fc6735c0b71
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc09018e2a00817778162596d3f9610e71a8a13be41391723e59f72c0c2f0b8a
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0652fdc81a19c58e14361714085bcb4172d17ffa522ffa2967dd41e5f98fe357
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 2.2005271911621094,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.5353,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 2.829289436340332,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.5587,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 2.785386562347412,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 2.5733,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 1.3352482318878174,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.3653,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 3.0877790451049805,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.2587,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 0.9194437265396118,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.605,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 1.778803825378418,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.4535,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 2.219306230545044,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.733,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 3.0522265434265137,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.8598,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 1.7349960803985596,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 0.6239,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 2.6168980598449707,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.6977,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 0.6964139938354492,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5198,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 2.2305409908294678,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.6052,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 2.8250887393951416,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 1.6726,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 1.5621683597564697,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 0.6449,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 1.7774029970169067,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.3363,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 1.4394720792770386,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 1.1173,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 0.6797069311141968,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 0.6117,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 0.983581006526947,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 0.2687,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 2.0993242263793945,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.9805,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 11.079167366027832,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 1.5928,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 2.051191806793213,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.5769,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 3.372947931289673,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 1.0517,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 2.3665897846221924,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 1.9332,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 2.36680269241333,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 1.1949,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 0.713635265827179,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.3461,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 3.4997355937957764,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 1.4795,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 6.203523635864258,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 1.5504,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 4.314687252044678,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.7256,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 4.574437618255615,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.6266,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 3.7229034900665283,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.861,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 2.443660020828247,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.6011,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.8502126932144165,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.4416,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 0.3321700990200043,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.1995,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 4.566008567810059,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.9874,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 2.5475733280181885,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.7815,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 1.5172197818756104,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.2529,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 5.836910724639893,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 1.6699,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 2.813737392425537,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.9105,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 3.7902021408081055,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.9401,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 5.073143005371094,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.6575,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 0.7925168871879578,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.2744,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 1.4323312044143677,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.7463,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 1.573714256286621,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.4023,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 2.421898126602173,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.4877,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 2.748384714126587,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.759,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 3.4596893787384033,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.4826,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 4.076857089996338,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 1.2546,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 1.5335589188468736e+16,
|
351 |
+
"train_loss": 0.8459088949813056,
|
352 |
+
"train_runtime": 452.4686,
|
353 |
+
"train_samples_per_second": 0.858,
|
354 |
+
"train_steps_per_second": 0.214
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 1.5335589188468736e+16,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c72a9298e90c4a5026adeee75d51a6787039d4083de3ba15eb18a73fcf09a51
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e253250a37cf6cae5c4dcf7987efc8ebd71581af9371fd266869a69e9d0da10
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:356b3acc7076436eb47f2e906faaa291cf6379c294c0851f177314d7b7c4e0c9
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3d7b8b4e24255f6052e1820a7a5cf87dfb846bff90e6c974c30d5767be5af55
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2aafa27a73b5f8aee55adc7d15f0e86119bd2d2862a123507854e0d1bf6829e4
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1af39dc03463a4ca2ff118e308fe0798e6a515c344a632c8f0d4f0b0e5acad8
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:544fa4d1ceead9f40862e2891a05492c06dd059aa8a539bc151fd1380003f039
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:385be361a93fce6f9feb2df719a26a9cc1c43ee1e5e4edfb0184de4ff6f6b13b
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 2.431029796600342,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 1.0939,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 0.8722438812255859,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.9552,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 1.1362252235412598,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.6326,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 0.799323320388794,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.9866,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 1.1111527681350708,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 1.0202,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 1.7144205570220947,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.9727,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 0.9399757981300354,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.468,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 0.7109354138374329,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.4818,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 1.847976565361023,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 1.0419,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 2.183365821838379,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.4441,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 0.9391213059425354,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.4735,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 0.10097850859165192,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 0.5644,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 0.9321213960647583,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 1.1182,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 1.5129534006118774,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.9117,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 1.2945747375488281,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 1.3779,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 1.1420668363571167,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 0.8766,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 1.9086567163467407,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 0.8392,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 1.51237952709198,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 1.2022,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 1.0951957702636719,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 1.1245,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 4.724721431732178,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 1.3685,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 1.6724278926849365,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.7964,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 0.3896200954914093,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 0.2359,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 0.6976099014282227,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.5979,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 1.113963007926941,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.7774,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 1.4955312013626099,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 0.999,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 1.4446187019348145,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 0.3719,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 1.4187099933624268,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 0.8836,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 0.9385492205619812,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 1.1007,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 1.1563595533370972,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 0.4014,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 1.551350474357605,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 0.6619,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 1.1922292709350586,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.5347,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 0.8667466640472412,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.7007,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.0623116493225098,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 0.5061,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 0.04590483754873276,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.3285,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 1.00609290599823,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 0.3451,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 1.5188184976577759,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 0.6001,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 0.9316257238388062,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 0.6777,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 1.65702223777771,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.7453,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 1.313071370124817,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.4345,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 0.6534919738769531,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.5735,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 0.32963377237319946,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.1275,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 0.9862601161003113,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.7405,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 4.502978801727295,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 0.8153,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 1.4814475774765015,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 0.8128,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 1.8578946590423584,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.6065,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 1.3685534000396729,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.4277,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 1.3430529832839966,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 0.7456,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 1.8663636445999146,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 0.4437,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 2.0809997453623296e+16,
|
351 |
+
"train_loss": 0.7489266002301088,
|
352 |
+
"train_runtime": 524.2746,
|
353 |
+
"train_samples_per_second": 0.74,
|
354 |
+
"train_steps_per_second": 0.185
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 2.0809997453623296e+16,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bc7c7424c2df6afa2b5bae3653afdaaad3158911d70013ce6f4f9dcbb554ba0
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c2424d1e1a49a8f746c7d30ac918d879bbbcae4726650c61cc996ef7b1fe0a8
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dc9d7da842da98082cf9c2c2fca77b3a7bd5877e8c31ae27e665ca686e55176
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eca367e33592f4a5962682369f04fb7f40d715e6eaa4faa4d68bee854a8f08a
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aca4fb55c66b3552c4255c8d6323f4e27baabe1cec491853761c693625301042
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c042ed992d8bc718a05f2f3ff9467b2ccb0dd25243c0e9c40dcfefb933fd495
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf9f9bbe51dc5519112adbd5447500bf5fef833ee61df5554c859782348e78b5
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32ff0b5147bbb9f48b017adae43e8b62e0bc9529077070959e498cc382f5470e
|
3 |
+
size 794706058
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_trainer_state.json
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 97,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.020618556701030927,
|
13 |
+
"grad_norm": 0.3599570095539093,
|
14 |
+
"learning_rate": 2e-05,
|
15 |
+
"loss": 0.2675,
|
16 |
+
"step": 2
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.041237113402061855,
|
20 |
+
"grad_norm": 1.1936863660812378,
|
21 |
+
"learning_rate": 2e-05,
|
22 |
+
"loss": 0.9035,
|
23 |
+
"step": 4
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.061855670103092786,
|
27 |
+
"grad_norm": 1.3883532285690308,
|
28 |
+
"learning_rate": 2e-05,
|
29 |
+
"loss": 0.6571,
|
30 |
+
"step": 6
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.08247422680412371,
|
34 |
+
"grad_norm": 2.7608344554901123,
|
35 |
+
"learning_rate": 2e-05,
|
36 |
+
"loss": 0.4303,
|
37 |
+
"step": 8
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.10309278350515463,
|
41 |
+
"grad_norm": 5.371406078338623,
|
42 |
+
"learning_rate": 2e-05,
|
43 |
+
"loss": 2.1826,
|
44 |
+
"step": 10
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.12371134020618557,
|
48 |
+
"grad_norm": 2.3308653831481934,
|
49 |
+
"learning_rate": 2e-05,
|
50 |
+
"loss": 0.6657,
|
51 |
+
"step": 12
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.14432989690721648,
|
55 |
+
"grad_norm": 0.042726580053567886,
|
56 |
+
"learning_rate": 2e-05,
|
57 |
+
"loss": 0.2714,
|
58 |
+
"step": 14
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.16494845360824742,
|
62 |
+
"grad_norm": 1.8078577518463135,
|
63 |
+
"learning_rate": 2e-05,
|
64 |
+
"loss": 0.893,
|
65 |
+
"step": 16
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.18556701030927836,
|
69 |
+
"grad_norm": 1.4273505210876465,
|
70 |
+
"learning_rate": 2e-05,
|
71 |
+
"loss": 0.9524,
|
72 |
+
"step": 18
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.20618556701030927,
|
76 |
+
"grad_norm": 2.442458152770996,
|
77 |
+
"learning_rate": 2e-05,
|
78 |
+
"loss": 1.1022,
|
79 |
+
"step": 20
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.2268041237113402,
|
83 |
+
"grad_norm": 0.48077863454818726,
|
84 |
+
"learning_rate": 2e-05,
|
85 |
+
"loss": 0.7807,
|
86 |
+
"step": 22
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.24742268041237114,
|
90 |
+
"grad_norm": 1.495241641998291,
|
91 |
+
"learning_rate": 2e-05,
|
92 |
+
"loss": 1.4534,
|
93 |
+
"step": 24
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.26804123711340205,
|
97 |
+
"grad_norm": 1.614429235458374,
|
98 |
+
"learning_rate": 2e-05,
|
99 |
+
"loss": 0.9892,
|
100 |
+
"step": 26
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.28865979381443296,
|
104 |
+
"grad_norm": 1.6893081665039062,
|
105 |
+
"learning_rate": 2e-05,
|
106 |
+
"loss": 0.5477,
|
107 |
+
"step": 28
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.30927835051546393,
|
111 |
+
"grad_norm": 2.3914456367492676,
|
112 |
+
"learning_rate": 2e-05,
|
113 |
+
"loss": 1.8496,
|
114 |
+
"step": 30
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.32989690721649484,
|
118 |
+
"grad_norm": 1.6967483758926392,
|
119 |
+
"learning_rate": 2e-05,
|
120 |
+
"loss": 1.0244,
|
121 |
+
"step": 32
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.35051546391752575,
|
125 |
+
"grad_norm": 3.545107364654541,
|
126 |
+
"learning_rate": 2e-05,
|
127 |
+
"loss": 1.1615,
|
128 |
+
"step": 34
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.3711340206185567,
|
132 |
+
"grad_norm": 1.4850765466690063,
|
133 |
+
"learning_rate": 2e-05,
|
134 |
+
"loss": 1.1958,
|
135 |
+
"step": 36
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.3917525773195876,
|
139 |
+
"grad_norm": 5.655995845794678,
|
140 |
+
"learning_rate": 2e-05,
|
141 |
+
"loss": 1.1793,
|
142 |
+
"step": 38
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.41237113402061853,
|
146 |
+
"grad_norm": 2.4371092319488525,
|
147 |
+
"learning_rate": 2e-05,
|
148 |
+
"loss": 0.8878,
|
149 |
+
"step": 40
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.4329896907216495,
|
153 |
+
"grad_norm": 1.0520563125610352,
|
154 |
+
"learning_rate": 2e-05,
|
155 |
+
"loss": 0.431,
|
156 |
+
"step": 42
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.4536082474226804,
|
160 |
+
"grad_norm": 1.6813929080963135,
|
161 |
+
"learning_rate": 2e-05,
|
162 |
+
"loss": 1.4381,
|
163 |
+
"step": 44
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.4742268041237113,
|
167 |
+
"grad_norm": 1.9094098806381226,
|
168 |
+
"learning_rate": 2e-05,
|
169 |
+
"loss": 0.9001,
|
170 |
+
"step": 46
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.4948453608247423,
|
174 |
+
"grad_norm": 1.8786391019821167,
|
175 |
+
"learning_rate": 2e-05,
|
176 |
+
"loss": 0.3896,
|
177 |
+
"step": 48
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.5154639175257731,
|
181 |
+
"grad_norm": 4.317810535430908,
|
182 |
+
"learning_rate": 2e-05,
|
183 |
+
"loss": 1.2674,
|
184 |
+
"step": 50
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.5360824742268041,
|
188 |
+
"grad_norm": 1.5854238271713257,
|
189 |
+
"learning_rate": 2e-05,
|
190 |
+
"loss": 1.2163,
|
191 |
+
"step": 52
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.5567010309278351,
|
195 |
+
"grad_norm": 4.147825717926025,
|
196 |
+
"learning_rate": 2e-05,
|
197 |
+
"loss": 1.2768,
|
198 |
+
"step": 54
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.5773195876288659,
|
202 |
+
"grad_norm": 1.2678698301315308,
|
203 |
+
"learning_rate": 2e-05,
|
204 |
+
"loss": 1.0032,
|
205 |
+
"step": 56
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.5979381443298969,
|
209 |
+
"grad_norm": 2.2713613510131836,
|
210 |
+
"learning_rate": 2e-05,
|
211 |
+
"loss": 1.7791,
|
212 |
+
"step": 58
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.6185567010309279,
|
216 |
+
"grad_norm": 4.81284236907959,
|
217 |
+
"learning_rate": 2e-05,
|
218 |
+
"loss": 1.5097,
|
219 |
+
"step": 60
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.6391752577319587,
|
223 |
+
"grad_norm": 1.9140543937683105,
|
224 |
+
"learning_rate": 2e-05,
|
225 |
+
"loss": 0.7074,
|
226 |
+
"step": 62
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.6597938144329897,
|
230 |
+
"grad_norm": 2.5636491775512695,
|
231 |
+
"learning_rate": 2e-05,
|
232 |
+
"loss": 0.8297,
|
233 |
+
"step": 64
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.6804123711340206,
|
237 |
+
"grad_norm": 1.8870617151260376,
|
238 |
+
"learning_rate": 2e-05,
|
239 |
+
"loss": 1.2714,
|
240 |
+
"step": 66
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.7010309278350515,
|
244 |
+
"grad_norm": 1.2541420459747314,
|
245 |
+
"learning_rate": 2e-05,
|
246 |
+
"loss": 0.8347,
|
247 |
+
"step": 68
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.7216494845360825,
|
251 |
+
"grad_norm": 1.135138988494873,
|
252 |
+
"learning_rate": 2e-05,
|
253 |
+
"loss": 1.5559,
|
254 |
+
"step": 70
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.7422680412371134,
|
258 |
+
"grad_norm": 0.9652976989746094,
|
259 |
+
"learning_rate": 2e-05,
|
260 |
+
"loss": 1.1556,
|
261 |
+
"step": 72
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.7628865979381443,
|
265 |
+
"grad_norm": 1.2813061475753784,
|
266 |
+
"learning_rate": 2e-05,
|
267 |
+
"loss": 1.1862,
|
268 |
+
"step": 74
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.7835051546391752,
|
272 |
+
"grad_norm": 1.8146921396255493,
|
273 |
+
"learning_rate": 2e-05,
|
274 |
+
"loss": 0.7589,
|
275 |
+
"step": 76
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.8041237113402062,
|
279 |
+
"grad_norm": 2.7170140743255615,
|
280 |
+
"learning_rate": 2e-05,
|
281 |
+
"loss": 0.4767,
|
282 |
+
"step": 78
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.8247422680412371,
|
286 |
+
"grad_norm": 1.401162028312683,
|
287 |
+
"learning_rate": 2e-05,
|
288 |
+
"loss": 0.7998,
|
289 |
+
"step": 80
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.845360824742268,
|
293 |
+
"grad_norm": 2.1656746864318848,
|
294 |
+
"learning_rate": 2e-05,
|
295 |
+
"loss": 0.5996,
|
296 |
+
"step": 82
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.865979381443299,
|
300 |
+
"grad_norm": 1.2385340929031372,
|
301 |
+
"learning_rate": 2e-05,
|
302 |
+
"loss": 0.6788,
|
303 |
+
"step": 84
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.8865979381443299,
|
307 |
+
"grad_norm": 1.43242609500885,
|
308 |
+
"learning_rate": 2e-05,
|
309 |
+
"loss": 1.3276,
|
310 |
+
"step": 86
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.9072164948453608,
|
314 |
+
"grad_norm": 3.322465658187866,
|
315 |
+
"learning_rate": 2e-05,
|
316 |
+
"loss": 1.2779,
|
317 |
+
"step": 88
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.9278350515463918,
|
321 |
+
"grad_norm": 1.1705447435379028,
|
322 |
+
"learning_rate": 2e-05,
|
323 |
+
"loss": 0.961,
|
324 |
+
"step": 90
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.9484536082474226,
|
328 |
+
"grad_norm": 3.3328497409820557,
|
329 |
+
"learning_rate": 2e-05,
|
330 |
+
"loss": 0.9414,
|
331 |
+
"step": 92
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.9690721649484536,
|
335 |
+
"grad_norm": 4.852980136871338,
|
336 |
+
"learning_rate": 2e-05,
|
337 |
+
"loss": 1.3991,
|
338 |
+
"step": 94
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.9896907216494846,
|
342 |
+
"grad_norm": 1.702784776687622,
|
343 |
+
"learning_rate": 2e-05,
|
344 |
+
"loss": 1.4331,
|
345 |
+
"step": 96
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 1.0,
|
349 |
+
"step": 97,
|
350 |
+
"total_flos": 1.9758677338619904e+16,
|
351 |
+
"train_loss": 1.00918516178721,
|
352 |
+
"train_runtime": 540.4351,
|
353 |
+
"train_samples_per_second": 0.718,
|
354 |
+
"train_steps_per_second": 0.179
|
355 |
+
}
|
356 |
+
],
|
357 |
+
"logging_steps": 2,
|
358 |
+
"max_steps": 97,
|
359 |
+
"num_input_tokens_seen": 0,
|
360 |
+
"num_train_epochs": 1,
|
361 |
+
"save_steps": 500,
|
362 |
+
"stateful_callbacks": {
|
363 |
+
"TrainerControl": {
|
364 |
+
"args": {
|
365 |
+
"should_epoch_stop": false,
|
366 |
+
"should_evaluate": false,
|
367 |
+
"should_log": false,
|
368 |
+
"should_save": false,
|
369 |
+
"should_training_stop": false
|
370 |
+
},
|
371 |
+
"attributes": {}
|
372 |
+
}
|
373 |
+
},
|
374 |
+
"total_flos": 1.9758677338619904e+16,
|
375 |
+
"train_batch_size": 1,
|
376 |
+
"trial_name": null,
|
377 |
+
"trial_params": null
|
378 |
+
}
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7e672830c3390cebc35186306c826e13e090e04ddeafd88ffc20e8522684105
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e16446a963b6b92d9b08fd590dd1a4cdf91041e770bce96813b10b4ed7f585e9
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf8a3f7300eed94ebeadf63f4fd1bc5a1a9e9ba2ff72874542fc0d228e1751b
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b92493e926b74fe46ad8c116cac0ce0f3b74ab7eab53772caa2f2d092cbe243
|
3 |
+
size 794708086
|
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ba67ca5277a23ddc688297378f77308686101a548c5d8ded5230ede369657d6
|
3 |
+
size 794706058
|