thkim0305 commited on
Commit
36aa57a
·
verified ·
1 Parent(s): aeccb48

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_trainer_state.json +378 -0
  10. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_trainer_state.json +378 -0
  19. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_trainer_state.json +378 -0
  28. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_trainer_state.json +378 -0
  37. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_trainer_state.json +378 -0
  46. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac6e3c20c9ce2b3bd22a4ec405cfe0e9c4823d415ddd9d0a1432c7f7a7821ab6
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9faab1abc5585336f9c5217cbf9229feae7f158d14775ec6b0465ecdcd6b72c
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b911d7298ebdb70e1147918b61c660749e1a0eb1428cf862812392dc3e79ee0
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c914f9fec69d46834a0247afa323ef2d204838bfc58478b2b70b659c6c7837
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43cd654d47bea4a0664e185ffcb76f37acdef0f55d2203b478aee301074cec65
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482a35fcc3ebc3cf030093ca637eeb0b4f1a908aa49998263fd26a4f21738ec7
3
+ size 369838470
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abab9ede2af77a1461e2d522138e4f5e882bed4af399616ec0dd66577cfdc753
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f38813bfe5d6e1f323702ba4809bf6db6af166cb14a2717a8eb9f1ef775cdc1
3
+ size 369837282
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.2302707433700562,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.6424,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.6121233701705933,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0052,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.4117467403411865,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.005,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 3.630777359008789,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.6888,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.9374276995658875,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.7314,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.9001209139823914,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6316,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.801906168460846,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9834,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.1132997274398804,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.3636,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.1756536960601807,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.3419,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.3688271045684814,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8968,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 8.31472110748291,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.8141,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.499045491218567,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5582,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 2.175128936767578,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.2834,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.3025474548339844,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.898,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.6166173219680786,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.2673,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.9590756297111511,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.3175,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.4882543087005615,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.6644,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.2179570198059082,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.7493,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.3930878639221191,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7374,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 2.189868688583374,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.9137,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.1282020807266235,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.6859,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.5559532642364502,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.528,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 2.0104541778564453,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9586,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 4.215638637542725,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.0792,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 3.2594962120056152,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.2841,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.7323676943778992,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.0893,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.5159685611724854,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.5317,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 4.473369598388672,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.2966,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 5.313100337982178,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.911,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 2.3561482429504395,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.8127,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 2.3545758724212646,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7019,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 7.38244104385376,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.86,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.3104522228240967,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.3289,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 2.5590906143188477,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8105,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 4.937007427215576,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1369,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 6.499209880828857,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8859,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 6.662985801696777,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.5822,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.9580082893371582,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.1448,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.697409749031067,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.8685,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 9.267684936523438,
287
+ "learning_rate": 2e-05,
288
+ "loss": 2.1197,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.6946778297424316,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6537,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 4.137977600097656,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.5048,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 2.1147782802581787,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.9791,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 2.045584201812744,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5647,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.7340083122253418,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.0669,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 2.2718918323516846,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.3045,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 2.3538527488708496,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7428,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.2346324920654297,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.5767,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 5656240185147392.0,
351
+ "train_loss": 0.9453033073661253,
352
+ "train_runtime": 235.4567,
353
+ "train_samples_per_second": 1.648,
354
+ "train_steps_per_second": 0.412
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 5656240185147392.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb57f4da4d9af2c8f88eec2c68cce411eb4ace1cd7b4b8c6cfcb2932e44e1af
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599c1c0f44c2450b50611cbf10979f3176c7fb5c97b9994e162e9afbe6589e1e
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61bc0585554474b7a6c7a46de7c089e6d367f8d84a3f2082dda108068613b90a
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a364994d7ccff9361b49b861cbbf4336eb34dd9e46c7921c637fa2c7a9532a6a
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6d174170815e6f16f125a4b43e29ade7e900a8e7534df2954323e02bae74339
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085787736ea4d59641527424baeb286c6851230bafe4e1c92069427d0e2fe907
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4319cfba2f0947e6c39dae27f1f6e85fe0967d560cf49a26499af35aeba4b27
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3091e8b368a7015e0b43b5db1be0a826707a5eb9ae318b117c885ee90397875a
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.8781272172927856,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.8515,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 4.043600082397461,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.9452,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.0472830533981323,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6496,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.3015058040618896,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.5628,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.2546836733818054,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5908,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 2.3884270191192627,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.1725,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.4381201267242432,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.827,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.5257450342178345,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.3457,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.5005308389663696,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.1501,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.9017939567565918,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.8049,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.4530093669891357,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.8718,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.9238858819007874,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.1926,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.5560870170593262,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8745,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 3.8156371116638184,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.4656,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 2.274550199508667,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.2095,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 2.0850868225097656,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.546,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.1981834173202515,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.5663,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 5.242424011230469,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.7796,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.4221488237380981,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.5759,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.140813946723938,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.8574,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 2.981257200241089,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6415,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 2.198453187942505,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.9304,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.601679801940918,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4595,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.553512692451477,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9711,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 2.1878437995910645,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.7843,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.1170060634613037,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.3049,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 3.8993043899536133,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.1269,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 3.7384300231933594,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.3041,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 4.556199550628662,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.5492,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.3752775192260742,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0508,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 5.783763885498047,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.7019,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 1.1487605571746826,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7288,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 2.0315375328063965,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.0151,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.0512689352035522,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.0146,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 2.663738250732422,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.2133,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.6531965732574463,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.8632,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 2.310678243637085,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.6255,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.3333702087402344,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.8103,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 2.0063538551330566,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.2381,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.6322288513183594,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.584,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.8914169073104858,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.0259,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 2.7294955253601074,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.9706,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 3.710840940475464,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.2648,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.3264449834823608,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.5923,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.280088186264038,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.6826,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 0.8928223252296448,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.5134,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 4.573293209075928,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.0831,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 2.4279978275299072,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.0474,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.3528534647570432e+16,
351
+ "train_loss": 1.0155205480831186,
352
+ "train_runtime": 413.9788,
353
+ "train_samples_per_second": 0.937,
354
+ "train_steps_per_second": 0.234
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.3528534647570432e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e41781055739cec33a2f49d21003d32c53009bfc48cb66d73c87eaf6b9828f6
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:470dcb87433ec035468bec134fe6c4d50e8a8f668c2c833a26ea926f11dbf049
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1640ce7785ab8e8e7902397e1908cbfc1011e1d5154f0e12e336affd18cd2cf
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3338370d88d25ad5a15f7a9160a5b9c280ec60de889a0caceb9ece4f4b6237a4
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41bf4f1754b8137517a3a1f7c53e997d179bcddef4baab07d74c51e28d2ad3df
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7857f8fb11d2880bf457dd12ed9ca204bf7a5eef168bdcd8d1a2fc6735c0b71
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc09018e2a00817778162596d3f9610e71a8a13be41391723e59f72c0c2f0b8a
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0652fdc81a19c58e14361714085bcb4172d17ffa522ffa2967dd41e5f98fe357
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 2.2005271911621094,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5353,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 2.829289436340332,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5587,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 2.785386562347412,
28
+ "learning_rate": 2e-05,
29
+ "loss": 2.5733,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.3352482318878174,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3653,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 3.0877790451049805,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.2587,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.9194437265396118,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.605,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.778803825378418,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4535,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 2.219306230545044,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.733,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 3.0522265434265137,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.8598,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.7349960803985596,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.6239,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 2.6168980598449707,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.6977,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.6964139938354492,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5198,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 2.2305409908294678,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.6052,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 2.8250887393951416,
105
+ "learning_rate": 2e-05,
106
+ "loss": 1.6726,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.5621683597564697,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6449,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.7774029970169067,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3363,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.4394720792770386,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.1173,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.6797069311141968,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6117,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.983581006526947,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.2687,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 2.0993242263793945,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.9805,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 11.079167366027832,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.5928,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 2.051191806793213,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.5769,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 3.372947931289673,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.0517,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 2.3665897846221924,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.9332,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 2.36680269241333,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.1949,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.713635265827179,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.3461,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 3.4997355937957764,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.4795,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 6.203523635864258,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.5504,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 4.314687252044678,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.7256,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 4.574437618255615,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.6266,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 3.7229034900665283,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.861,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 2.443660020828247,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.6011,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.8502126932144165,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.4416,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.3321700990200043,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.1995,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 4.566008567810059,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.9874,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.5475733280181885,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.7815,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.5172197818756104,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.2529,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 5.836910724639893,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.6699,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 2.813737392425537,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9105,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 3.7902021408081055,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.9401,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 5.073143005371094,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6575,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.7925168871879578,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.2744,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.4323312044143677,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.7463,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.573714256286621,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.4023,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 2.421898126602173,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.4877,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 2.748384714126587,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.759,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 3.4596893787384033,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.4826,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 4.076857089996338,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.2546,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.5335589188468736e+16,
351
+ "train_loss": 0.8459088949813056,
352
+ "train_runtime": 452.4686,
353
+ "train_samples_per_second": 0.858,
354
+ "train_steps_per_second": 0.214
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.5335589188468736e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c72a9298e90c4a5026adeee75d51a6787039d4083de3ba15eb18a73fcf09a51
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e253250a37cf6cae5c4dcf7987efc8ebd71581af9371fd266869a69e9d0da10
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:356b3acc7076436eb47f2e906faaa291cf6379c294c0851f177314d7b7c4e0c9
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d7b8b4e24255f6052e1820a7a5cf87dfb846bff90e6c974c30d5767be5af55
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aafa27a73b5f8aee55adc7d15f0e86119bd2d2862a123507854e0d1bf6829e4
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1af39dc03463a4ca2ff118e308fe0798e6a515c344a632c8f0d4f0b0e5acad8
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544fa4d1ceead9f40862e2891a05492c06dd059aa8a539bc151fd1380003f039
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385be361a93fce6f9feb2df719a26a9cc1c43ee1e5e4edfb0184de4ff6f6b13b
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 2.431029796600342,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0939,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.8722438812255859,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9552,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.1362252235412598,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6326,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.799323320388794,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.9866,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 1.1111527681350708,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.0202,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 1.7144205570220947,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.9727,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.9399757981300354,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.468,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.7109354138374329,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4818,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.847976565361023,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.0419,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.183365821838379,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.4441,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.9391213059425354,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.4735,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.10097850859165192,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5644,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.9321213960647583,
98
+ "learning_rate": 2e-05,
99
+ "loss": 1.1182,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.5129534006118774,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.9117,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.2945747375488281,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.3779,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.1420668363571167,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8766,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.9086567163467407,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8392,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.51237952709198,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.2022,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.0951957702636719,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.1245,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 4.724721431732178,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.3685,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.6724278926849365,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.7964,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.3896200954914093,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2359,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.6976099014282227,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5979,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.113963007926941,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.7774,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.4955312013626099,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.999,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.4446187019348145,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.3719,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.4187099933624268,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.8836,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.9385492205619812,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.1007,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 1.1563595533370972,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.4014,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.551350474357605,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.6619,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.1922292709350586,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.5347,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.8667466640472412,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.7007,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.0623116493225098,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.5061,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.04590483754873276,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.3285,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.00609290599823,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.3451,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.5188184976577759,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.6001,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.9316257238388062,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.6777,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.65702223777771,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.7453,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 1.313071370124817,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.4345,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.6534919738769531,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.5735,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.32963377237319946,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.1275,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.9862601161003113,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.7405,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 4.502978801727295,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.8153,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.4814475774765015,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.8128,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.8578946590423584,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.6065,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.3685534000396729,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.4277,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.3430529832839966,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7456,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.8663636445999146,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.4437,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2.0809997453623296e+16,
351
+ "train_loss": 0.7489266002301088,
352
+ "train_runtime": 524.2746,
353
+ "train_samples_per_second": 0.74,
354
+ "train_steps_per_second": 0.185
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2.0809997453623296e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc7c7424c2df6afa2b5bae3653afdaaad3158911d70013ce6f4f9dcbb554ba0
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c2424d1e1a49a8f746c7d30ac918d879bbbcae4726650c61cc996ef7b1fe0a8
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc9d7da842da98082cf9c2c2fca77b3a7bd5877e8c31ae27e665ca686e55176
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eca367e33592f4a5962682369f04fb7f40d715e6eaa4faa4d68bee854a8f08a
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca4fb55c66b3552c4255c8d6323f4e27baabe1cec491853761c693625301042
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c042ed992d8bc718a05f2f3ff9467b2ccb0dd25243c0e9c40dcfefb933fd495
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9f9bbe51dc5519112adbd5447500bf5fef833ee61df5554c859782348e78b5
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32ff0b5147bbb9f48b017adae43e8b62e0bc9529077070959e498cc382f5470e
3
+ size 794706058
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.3599570095539093,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.2675,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.1936863660812378,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9035,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.3883532285690308,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.6571,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 2.7608344554901123,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.4303,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 5.371406078338623,
42
+ "learning_rate": 2e-05,
43
+ "loss": 2.1826,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 2.3308653831481934,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.6657,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.042726580053567886,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.2714,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.8078577518463135,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.893,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.4273505210876465,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.9524,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.442458152770996,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.1022,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.48077863454818726,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.7807,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.495241641998291,
91
+ "learning_rate": 2e-05,
92
+ "loss": 1.4534,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.614429235458374,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.9892,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.6893081665039062,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.5477,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 2.3914456367492676,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.8496,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.6967483758926392,
119
+ "learning_rate": 2e-05,
120
+ "loss": 1.0244,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 3.545107364654541,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.1615,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.4850765466690063,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.1958,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 5.655995845794678,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.1793,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 2.4371092319488525,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.8878,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.0520563125610352,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.431,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 1.6813929080963135,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.4381,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.9094098806381226,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9001,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 1.8786391019821167,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3896,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 4.317810535430908,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.2674,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.5854238271713257,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.2163,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 4.147825717926025,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.2768,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.2678698301315308,
203
+ "learning_rate": 2e-05,
204
+ "loss": 1.0032,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 2.2713613510131836,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.7791,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 4.81284236907959,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.5097,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.9140543937683105,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7074,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 2.5636491775512695,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.8297,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.8870617151260376,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.2714,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.2541420459747314,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8347,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.135138988494873,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.5559,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.9652976989746094,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.1556,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.2813061475753784,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.1862,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.8146921396255493,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.7589,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 2.7170140743255615,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.4767,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 1.401162028312683,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.7998,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 2.1656746864318848,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.5996,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.2385340929031372,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6788,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.43242609500885,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.3276,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 3.322465658187866,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.2779,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.1705447435379028,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.961,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 3.3328497409820557,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.9414,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 4.852980136871338,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.3991,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.702784776687622,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.4331,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 1.9758677338619904e+16,
351
+ "train_loss": 1.00918516178721,
352
+ "train_runtime": 540.4351,
353
+ "train_samples_per_second": 0.718,
354
+ "train_steps_per_second": 0.179
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 1.9758677338619904e+16,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e672830c3390cebc35186306c826e13e090e04ddeafd88ffc20e8522684105
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e16446a963b6b92d9b08fd590dd1a4cdf91041e770bce96813b10b4ed7f585e9
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf8a3f7300eed94ebeadf63f4fd1bc5a1a9e9ba2ff72874542fc0d228e1751b
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b92493e926b74fe46ad8c116cac0ce0f3b74ab7eab53772caa2f2d092cbe243
3
+ size 794708086
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_Aorthensure_Brand_saveoptim_lr2e-5_sc132_4tasks_5rounds_fixit97_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba67ca5277a23ddc688297378f77308686101a548c5d8ded5230ede369657d6
3
+ size 794706058