thkim0305 commited on
Commit
ad2c972
·
verified ·
1 Parent(s): ae71b7c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth +3 -0
  2. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth +3 -0
  3. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth +3 -0
  4. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth +3 -0
  5. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth +3 -0
  6. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth +3 -0
  7. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth +3 -0
  8. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth +3 -0
  9. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json +378 -0
  10. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth +3 -0
  11. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth +3 -0
  12. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth +3 -0
  13. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth +3 -0
  14. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth +3 -0
  15. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth +3 -0
  16. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth +3 -0
  17. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth +3 -0
  18. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json +378 -0
  19. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth +3 -0
  20. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth +3 -0
  21. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth +3 -0
  22. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth +3 -0
  23. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth +3 -0
  24. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth +3 -0
  25. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth +3 -0
  26. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth +3 -0
  27. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json +378 -0
  28. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth +3 -0
  29. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth +3 -0
  30. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth +3 -0
  31. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth +3 -0
  32. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth +3 -0
  33. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth +3 -0
  34. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth +3 -0
  35. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth +3 -0
  36. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json +378 -0
  37. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth +3 -0
  38. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth +3 -0
  39. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth +3 -0
  40. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth +3 -0
  41. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth +3 -0
  42. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth +3 -0
  43. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth +3 -0
  44. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth +3 -0
  45. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json +378 -0
  46. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth +3 -0
  47. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth +3 -0
  48. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth +3 -0
  49. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth +3 -0
  50. client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b96c362a78aed601019971f00d6027bed72a085f801c5708c469b30b56e704
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa113a087044d45e9458ac9c0cc467fc5d4efc7adf19f478aa37bcbf0caaf2f
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e235f913c8f73283f7d46940c3d7efa2db6ba791511a72d27267d2ce1e562533
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38483e00ef8f3a3423e44729285077bc1efbc42c6112450c0be7069f15fde5fc
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:424f036eb3473b0625a9b07031fccf681a4aa443d3d85c2dee0a124fc093d62e
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb30d25ae19dd6ca1f01a3f066d39b79c10c0ae3937a2722a15f790a8acc3da
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16e8b9983821b40d2bccba0cb8aceacdc83d0f24b2a9d503b06bbd0e29d536c0
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce653e9bb0f893fc5b3c40b624e4375f73e384bfee5a88fcfe1d8838f649f6a2
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.9091740846633911,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.2221,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 3.158586263656616,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.0034,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 0.46037667989730835,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.0698,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 4.0920891761779785,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6852,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.10336245596408844,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0561,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 2.5557003021240234,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.5174,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.9013806581497192,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.3884,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.5343479514122009,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0633,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 1.3988330364227295,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.4331,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 0.7816677093505859,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.1989,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.7695223093032837,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.3803,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 2.075623035430908,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.6594,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.132662296295166,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3909,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.9386407136917114,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.9071,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.1776388883590698,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1846,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 2.3658692836761475,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.2869,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 1.8401161432266235,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.4268,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 6.151878356933594,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.8957,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.4062762260437012,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1458,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.788042426109314,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.1687,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 3.1902451515197754,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6988,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 4.431184768676758,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.4197,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.8136569857597351,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0892,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.2823968529701233,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.3095,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 2.880483627319336,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2591,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 5.056119918823242,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8428,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.07207974791526794,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0128,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.1768360137939453,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.1406,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 3.5694682598114014,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.5909,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.23515310883522034,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.325,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 0.7991934418678284,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.2351,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.023807095363736153,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0073,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.7518260478973389,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0999,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 2.0427582263946533,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.176,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 2.179276466369629,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.139,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.9427316188812256,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0631,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 4.460165500640869,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.643,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 3.386460542678833,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.632,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 2.405658483505249,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.2599,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 4.817083358764648,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.834,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 5.088555812835693,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.4744,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.05794494226574898,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6645,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.06005644053220749,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.3662,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.3105262219905853,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.374,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.043837547302246,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.2253,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 3.3620216846466064,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.0345,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.1824847310781479,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.1119,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 0.09524671733379364,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.1297,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2866814929338368.0,
351
+ "train_loss": 0.4281441053164374,
352
+ "train_runtime": 417.1615,
353
+ "train_samples_per_second": 0.93,
354
+ "train_steps_per_second": 0.233
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2866814929338368.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf5b1b07a962367241a2790e3b47841d377d0a5859a24e0bd572bbb2e9a5dad4
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcaa8b85b90be439bd3bf93fa1b50be8a1a524c0e2551727713f22186123d1f0
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad9f7eab0ddc66d1c63eaf3d04cce2d870464e591c10a0a0cd59c04943163477
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a74dcff81385ce7d01fd715bfe9cd9115994a33b4f2116a5a42c2e3b9691c0b7
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7215205419c641a43021b684e80c528adf84563ea906d6c2e68c6843e62d71
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ebdc8a3d437a1ef847589521c74c09d29530143a00ceb9bef8a458f47d6c12c
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bb1653c987aae96fe205a4517a078a0ac54e4b545c86cea875b9dabae7b576
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0793485bc61d7c2e0c75e7ca0f596a805ee2682aaf4bbb6109607da5e979984
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.00990249216556549,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0343,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.24752184748649597,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0285,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 0.11016274988651276,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.1125,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.015819694846868515,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0193,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.0025153865572065115,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0022,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.018475506454706192,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.002,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 5.484030723571777,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5337,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.01962272636592388,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.1908,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.02493014559149742,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0014,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 0.09554272145032883,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.71,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.007511932868510485,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0032,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.6068776845932007,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0584,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 0.019293755292892456,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.0174,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.006605098024010658,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0035,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 0.38141685724258423,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.4474,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 0.03302409499883652,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.0047,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 6.83150577545166,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.324,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.10726940631866455,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0058,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.44090333580970764,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.0431,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.19889701902866364,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.7427,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 1.034143090248108,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0863,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.22705499827861786,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.357,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.7536527514457703,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.1098,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.056017301976680756,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0064,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 0.006744038313627243,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0006,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.9016902446746826,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.1382,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.11894352734088898,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0104,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.39221423864364624,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.032,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.09858262538909912,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.049,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.005559508688747883,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.1089,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.6431413888931274,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.1877,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.004421388264745474,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0127,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 0.002455125330016017,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0007,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 2.288174629211426,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.593,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 0.5138911604881287,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.7277,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.2915093004703522,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0794,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.012635215185582638,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0644,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.5793916583061218,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.2267,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 2.189505100250244,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.6962,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 0.004708629101514816,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.0011,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 0.007471214048564434,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0018,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.014228110201656818,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0012,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.01875714771449566,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0231,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.010902749374508858,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0009,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.6271221041679382,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0795,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.3592969179153442,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.106,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 0.4871022403240204,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0463,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 0.0031653214246034622,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.0005,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2888162800041984.0,
351
+ "train_loss": 0.1450096553133935,
352
+ "train_runtime": 415.6208,
353
+ "train_samples_per_second": 0.934,
354
+ "train_steps_per_second": 0.233
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2888162800041984.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:117c5018b1144fbe01a8a0b3e5b5f28be996c6cd1ced5dd5ebae894b7b20764f
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46882a8e42fbde28008f5736f9a52d0589cd87b83849c4fd9c2719a7737d7caf
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0198020029a280ceab5c502fcc1630094eccd304142cbe991b7716d5e76909
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb825c00a750f5b116888f6e06e8a0f1899b7080a2228e4f85b242dabe7b805
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0285e24aa92adaeb50c6942d77c9baad30255eac5a6072995e1689b161fae85e
3
+ size 978818810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc759a1821037662e3bc31ea98126c55b6efe62a25eff727fe0d32a32c1ecc96
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a25b0ef58db4a2b67f05037fa400effaa9f3be59bae025b818883e74667a446d
3
+ size 978818810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e176a28f5e8d10fdc2c78c18da653d7d6816ea6950bedf534d204526cd4a1e
3
+ size 978818810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.143397331237793,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5443,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 1.2255475521087646,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.9587,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.8370898962020874,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.182,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.24408237636089325,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.2572,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.2356729656457901,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.3236,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.12980803847312927,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.359,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 0.34007528424263,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.1969,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 0.8254105448722839,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.7104,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.665677011013031,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.2014,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 0.9692237973213196,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.6621,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 0.43489089608192444,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.6234,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 2.537994861602783,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.9041,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.01339590549469,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3681,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.37009069323539734,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4056,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 0.044184956699609756,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.1972,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.3115153312683105,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3582,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.7034705281257629,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.2604,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.5956244468688965,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.9219,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 1.4682765007019043,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7098,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 2.516416311264038,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.1477,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 2.4257423877716064,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.0164,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.3192073404788971,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.3794,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 2.1369898319244385,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.5324,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.22400034964084625,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.7004,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.9823366403579712,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.9041,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 1.1424986124038696,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.2122,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.1508723497390747,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.2551,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.047311544418335,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.3889,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 0.28297775983810425,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.2793,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 0.2760946452617645,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.4845,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 3.4843385219573975,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.725,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.1845403015613556,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.2025,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.3841183185577393,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.6707,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.6219921708106995,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.3708,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.2532652616500854,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.6669,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 1.8839783668518066,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.4824,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 1.2260621786117554,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.8137,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.1083022356033325,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.9023,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 0.7099221348762512,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.3594,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 1.9942265748977661,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.4561,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 3.795022964477539,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.0328,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 0.9263180494308472,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6605,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.9401521682739258,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.3343,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.5276206731796265,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.6242,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 2.310457229614258,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.854,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 1.974561333656311,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.8861,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 2.190361499786377,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.0974,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 1.6531275510787964,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.7672,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 5270224836231168.0,
351
+ "train_loss": 0.6526356726577601,
352
+ "train_runtime": 527.3005,
353
+ "train_samples_per_second": 0.736,
354
+ "train_steps_per_second": 0.184
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 5270224836231168.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f38637be273d636ae4bf42e45ac88bd68d613b9db39c4011385158e57cfc82
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15d4a001460770d6133cfe80cb6b1118d692eac140ab64b0b5dbe135a59035d9
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79870074898564ea9937eef2cb84f972f35f15ef60ff83d4d5e9221c910c1649
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5072ca902f0faeb5daf4f968b0859f0fd9c37293147851a3bf30d319ee022741
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b72835c7dcc77117ee65d0b8a56db8bcef201336153949791586c6cf628dbb
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5da7ae67d07bffcb1ca91e721216e9bb57862eaf28b3fd5621c55e61375a5e0
3
+ size 606590838
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0ddcc5243cf0cbb1c7689c4e85a09a645f7f24a043fbffea59babe8a120ff12
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38eb64e42265ae8776bbe390aeea5b07ff1fa63c9ecc1aeae053eae53259c4c
3
+ size 606588810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 0.8621726036071777,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.784,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 0.5997222661972046,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6114,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 3.3685293197631836,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.8636,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 1.9444336891174316,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.7377,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 3.8980281352996826,
42
+ "learning_rate": 2e-05,
43
+ "loss": 2.0884,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 3.079683542251587,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.1494,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 3.4189367294311523,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.9661,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.382783055305481,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0355,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 5.828529357910156,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.5671,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 2.367591142654419,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8609,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 1.9332449436187744,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.9275,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 1.3459978103637695,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.8898,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.5858796834945679,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3414,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 1.9047499895095825,
105
+ "learning_rate": 2e-05,
106
+ "loss": 3.4836,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 3.298762321472168,
112
+ "learning_rate": 2e-05,
113
+ "loss": 1.4902,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.7490943670272827,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.759,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.9601039290428162,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.5406,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 1.696427345275879,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5591,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 4.497976779937744,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.48,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 1.4220675230026245,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.9096,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.5704763531684875,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.0739,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 3.798910140991211,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.8083,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 0.6530976891517639,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.3467,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 2.5659232139587402,
175
+ "learning_rate": 2e-05,
176
+ "loss": 1.2449,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.7021549940109253,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.991,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 2.904054880142212,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.9027,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 0.40748944878578186,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.3164,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 1.6402074098587036,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.4264,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 3.439208984375,
210
+ "learning_rate": 2e-05,
211
+ "loss": 2.0284,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 2.8119544982910156,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.5881,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.5405571460723877,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.9022,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 4.333232402801514,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.2758,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.6036462783813477,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.7932,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 0.596666157245636,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.7625,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 2.8570806980133057,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.7929,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 2.321223020553589,
259
+ "learning_rate": 2e-05,
260
+ "loss": 1.2169,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 4.413544178009033,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.3534,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 1.6249202489852905,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.6887,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 3.379758834838867,
280
+ "learning_rate": 2e-05,
281
+ "loss": 2.4109,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 1.7662537097930908,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.8549,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 2.5819923877716064,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6496,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 2.989832878112793,
301
+ "learning_rate": 2e-05,
302
+ "loss": 1.1886,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 1.8118072748184204,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.149,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 1.4145994186401367,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.8608,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 1.3375070095062256,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.9514,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 5.697417259216309,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.387,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 3.187375545501709,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.7515,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 4.534388542175293,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.2819,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 2928335550152704.0,
351
+ "train_loss": 1.1240325613120168,
352
+ "train_runtime": 413.8678,
353
+ "train_samples_per_second": 0.937,
354
+ "train_steps_per_second": 0.234
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 2928335550152704.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ede28fdd2cede10ccf106490cbc2da115e8236fc942f0fc8573f1f3127100ea
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a41cc09a59ab65452ba45d80dcde938e20a6d1c74313d8d819237825ecc5c129
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0231971052f8cadaf5af44030be3e4a0d1119e6cd551875513b30a19e2ba16ae
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03f1b6ddfea3320be70db4eec5aaea28b3cb748e1b15df3c1c0241f193f5de4
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c540936a1e1cfa6e1e8ac3eab7ab2ad6ef4730a3d071d311b3d2e6888cfda4f
3
+ size 978818810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b455529478842b55f61e1722446272f3df530a1f55a1a44c5e36df6879739386
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c4f909649374c7493c39988abbb17b7a6ec2b86497510258894a8ca3398f0e
3
+ size 978818810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a6fd9c6fe317a9688edc469a71b86ddcb698e23fbe087692d2d4d5f8bda91a5
3
+ size 978818810
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 97,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.020618556701030927,
13
+ "grad_norm": 1.9402724504470825,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.5838,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.041237113402061855,
20
+ "grad_norm": 2.4250800609588623,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.2304,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.061855670103092786,
27
+ "grad_norm": 1.5540610551834106,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.3604,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08247422680412371,
34
+ "grad_norm": 0.7538492679595947,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6599,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.10309278350515463,
41
+ "grad_norm": 0.9257169365882874,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.6932,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12371134020618557,
48
+ "grad_norm": 0.5299786329269409,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.5977,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14432989690721648,
55
+ "grad_norm": 1.8260445594787598,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.7848,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16494845360824742,
62
+ "grad_norm": 1.0459568500518799,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.423,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18556701030927836,
69
+ "grad_norm": 0.7857280373573303,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5644,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.20618556701030927,
76
+ "grad_norm": 1.1462950706481934,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.446,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.2268041237113402,
83
+ "grad_norm": 2.205085039138794,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.4026,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24742268041237114,
90
+ "grad_norm": 0.7716929316520691,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.312,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26804123711340205,
97
+ "grad_norm": 1.7518846988677979,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.9049,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28865979381443296,
104
+ "grad_norm": 0.8513244390487671,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.7163,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.30927835051546393,
111
+ "grad_norm": 1.708772897720337,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.848,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32989690721649484,
118
+ "grad_norm": 1.2805875539779663,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.8176,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.35051546391752575,
125
+ "grad_norm": 0.7299635410308838,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.3226,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.3711340206185567,
132
+ "grad_norm": 0.9453825354576111,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.4467,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.3917525773195876,
139
+ "grad_norm": 0.4364389181137085,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.4499,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.41237113402061853,
146
+ "grad_norm": 0.4604692757129669,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.7041,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.4329896907216495,
153
+ "grad_norm": 0.42262014746665955,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.5528,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.4536082474226804,
160
+ "grad_norm": 0.2717072069644928,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.2196,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.4742268041237113,
167
+ "grad_norm": 1.1014915704727173,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4718,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.4948453608247423,
174
+ "grad_norm": 0.48280566930770874,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1461,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5154639175257731,
181
+ "grad_norm": 1.16571843624115,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.7124,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.5360824742268041,
188
+ "grad_norm": 0.14320193231105804,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.7823,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.5567010309278351,
195
+ "grad_norm": 1.8387656211853027,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.3319,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.5773195876288659,
202
+ "grad_norm": 0.9942914247512817,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.4071,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.5979381443298969,
209
+ "grad_norm": 1.0944980382919312,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.617,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6185567010309279,
216
+ "grad_norm": 1.0097112655639648,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.2987,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.6391752577319587,
223
+ "grad_norm": 1.1440106630325317,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.7794,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.6597938144329897,
230
+ "grad_norm": 0.9374860525131226,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.737,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.6804123711340206,
237
+ "grad_norm": 1.6988123655319214,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.7242,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.7010309278350515,
244
+ "grad_norm": 1.1150574684143066,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.2495,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7216494845360825,
251
+ "grad_norm": 1.318420171737671,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.0947,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.7422680412371134,
258
+ "grad_norm": 0.7494012713432312,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.4376,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.7628865979381443,
265
+ "grad_norm": 0.5291847586631775,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.1724,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.7835051546391752,
272
+ "grad_norm": 0.8617143630981445,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.552,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.8041237113402062,
279
+ "grad_norm": 0.4218270480632782,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.1633,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8247422680412371,
286
+ "grad_norm": 1.3452715873718262,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.8789,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.845360824742268,
293
+ "grad_norm": 1.967393398284912,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.4333,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.865979381443299,
300
+ "grad_norm": 1.4400347471237183,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.9299,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.8865979381443299,
307
+ "grad_norm": 0.30658483505249023,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.0109,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.9072164948453608,
314
+ "grad_norm": 0.16921880841255188,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.2108,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9278350515463918,
321
+ "grad_norm": 0.4752196669578552,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.4654,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.9484536082474226,
328
+ "grad_norm": 2.074636936187744,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.8971,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.9690721649484536,
335
+ "grad_norm": 1.7106537818908691,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.0818,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.9896907216494846,
342
+ "grad_norm": 0.6808597445487976,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.6208,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 1.0,
349
+ "step": 97,
350
+ "total_flos": 6081635527163904.0,
351
+ "train_loss": 0.6636996711652303,
352
+ "train_runtime": 526.4308,
353
+ "train_samples_per_second": 0.737,
354
+ "train_steps_per_second": 0.184
355
+ }
356
+ ],
357
+ "logging_steps": 2,
358
+ "max_steps": 97,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 1,
361
+ "save_steps": 500,
362
+ "stateful_callbacks": {
363
+ "TrainerControl": {
364
+ "args": {
365
+ "should_epoch_stop": false,
366
+ "should_evaluate": false,
367
+ "should_log": false,
368
+ "should_save": false,
369
+ "should_training_stop": false
370
+ },
371
+ "attributes": {}
372
+ }
373
+ },
374
+ "total_flos": 6081635527163904.0,
375
+ "train_batch_size": 1,
376
+ "trial_name": null,
377
+ "trial_params": null
378
+ }
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1ca40ac7a86695907ed50ab92b3322e850ce3c6b20d88160df8fea655233b1
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd30b73f84f40cfc88d63e9fc388b5f992dd60422583b803e4554908460b5e99
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7daa10dfe387d89f37538ddaf13b254637fcf67fec290c83210cf4e18f936ec8
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84e1d84adca8d5f3beff7b2330eaacb77d10fc1382a4a63da515bd4d30c4402e
3
+ size 978821398
client_states_feddualMultipqfullfreeze_homoAgg_moe_T05_freq10_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace73a466487a0e201e84be96ae47e72b535f683a7e17c5ddf448191f327469e
3
+ size 978818810