thkim0305 commited on
Commit
08c4bfe
·
verified ·
1 Parent(s): c6c0078

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
  10. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
  19. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
  28. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
  37. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
  46. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a4d0d6615d2510711f99c6b666e858a2598446d3263d37eed691a52c48cafa
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8467983a8faf53ac385f579b43f293895165ec3553aa52dcb326de610583d6a0
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5bf18123b293dbbba941833be776184ede92ebbb3304e9365d5bf968ecc50b6
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35940a8c8eddd6b7454570d0e6c61016292d9d2e5197647f71eb2ddcdb093c33
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4a6fb5a88281d0acef0944c1cc603e502dfe0190a9f8f36296a7c9476d566a
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:468aaa38b2dbdf861f538b48b6969482f6d8913aeac15e921782031762a86c89
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed2f8b54154e62827fc7ccb130532f5449940421549b41b08b74e89ff8a3b4c
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81889dbecd6a4ea0784812af1ca62a08caa07cf01b024f7335fd93dd392681c9
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.4507830142974854,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.2925,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 3.2782809734344482,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5226,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.6665804386138916,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.4469,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 6.7659735679626465,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.6084,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 2.461778163909912,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.6173,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.8903517723083496,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.095,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 4.321805000305176,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.4503,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.4119670391082764,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.3114,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 4.858500003814697,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.3571,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 3.7888195514678955,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8946,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.9685637950897217,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1218,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 6.1121015548706055,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5919,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 5.337240219116211,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.366,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 4.253252983093262,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4007,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 8.691818237304688,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.9219,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 4.221813201904297,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3515,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 6.260042667388916,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.8002,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 11.410113334655762,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.451,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 3.6883702278137207,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.3644,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.033090353012085,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.2821,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 6.383613109588623,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.392,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.7196388244628906,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1335,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 9.112273216247559,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.9183,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 4.017404556274414,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.5077,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.14292454719543457,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.323,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.870044469833374,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.679,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 7.838882923126221,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.5556,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.5267337560653687,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.6559,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 4.8004374504089355,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.1925,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.9164693355560303,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.3563,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 10.06078815460205,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.1676,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 4.685103416442871,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.4408,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.2955524921417236,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.1538,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.193854331970215,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8281,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.9019300937652588,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1189,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 10.440985679626465,
259
+ "learning_rate": 2e-05,
260
+ "loss": 2.1768,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.911909818649292,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.7279,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.03667999058961868,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.115,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 2.2721805572509766,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.1444,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.097175121307373,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2108,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.7106596231460571,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0699,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 2.563626766204834,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.1482,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.34837377071380615,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0672,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.41047564148902893,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.3899,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.7772120833396912,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0882,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.9385977387428284,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.0358,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 6.794859886169434,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.1388,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 8.23221492767334,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.3678,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.18338899314403534,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.2155,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.3215850591659546,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.0276,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2053446886752256.0,
365
+ "train_loss": 0.5718875336647034,
366
+ "train_runtime": 100.3201,
367
+ "train_samples_per_second": 3.987,
368
+ "train_steps_per_second": 0.997
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2053446886752256.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9983a1b6c779a4d52cdc627f04f956d61efac456204bcc4dabada42d9a3dfdd
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2de129e62beeb4971790b5e1f46c5a709dcf59503e9475b5787bf0cd581368e
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ed512b6dcd1a66d95d053edadd3e3fbdf255ba36752a7ade6a545f7a8c1654
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4219da9ea85c1442677e975ed2d3f527a357245f0998ca76a59bd078777e3fd
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ceea20bd9bb93c4ec0fd7ee4511cfb48fbb6ab8dac82e3d4240b59dba7c865
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac5c439ec99857d8e543ff416a2fd33e4648cf7a54164d4ff91a2487ca1202e
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca3cc56d5d8ef6bf8ceb6673227c86c6fdfa5c11342ee85b31de747c9ee1296
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf892a9fd738f4567b9929fca404c0a7a73c02558fbe6ed6dc4303c2e51f42b8
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 4.540980339050293,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.422,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.08287880569696426,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0048,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.329127788543701,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.2607,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.11668016016483307,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0061,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.266418218612671,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0361,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 1.0858792066574097,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0498,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.4955309629440308,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0903,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 3.658975839614868,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4419,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.029739608988165855,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0057,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.04279458895325661,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0027,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.1662931442260742,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0642,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.0573233366012573,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0731,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 4.016772747039795,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3845,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.2739083766937256,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0173,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.004112154711037874,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5049,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.817938208580017,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.1084,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 0.040277149528265,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0015,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.03433386608958244,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0028,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 3.3714048862457275,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1396,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.03902699798345566,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0027,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.17097237706184387,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.006,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 0.004333522170782089,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.0081,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.13571485877037048,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.006,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.31877508759498596,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0122,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.09019943326711655,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0051,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 10.819501876831055,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.0651,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.5622828602790833,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0183,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.3623190224170685,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.0462,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 0.40045422315597534,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.1874,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.0034316980745643377,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.0095,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.01208802405744791,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0055,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.01345849223434925,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0033,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.6658453941345215,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0433,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.384672164916992,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.2239,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.07835771143436432,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.0073,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.4132821559906006,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0383,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.9961208701133728,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0747,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.9463048577308655,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0433,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.01034417562186718,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.0328,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.0005425452254712582,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.3519,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.04632725194096565,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0369,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.2183465957641602,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0654,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.009153845719993114,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0014,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.8759830594062805,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0918,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.031464505940675735,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0014,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.038530562072992325,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.0023,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.04050149768590927,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0215,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.009601338766515255,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.001,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.02644410915672779,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.0018,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 13.793920516967773,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.8695,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2069576665792512.0,
365
+ "train_loss": 0.11801103919744492,
366
+ "train_runtime": 100.2591,
367
+ "train_samples_per_second": 3.99,
368
+ "train_steps_per_second": 0.997
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2069576665792512.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03582a65c5f942f662a8eafdf27eccb06167fe68a235376a577c83b61464520f
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f25cc37e7b0ba30193079fe56dc4c3aa9b011c90f9b100c4edb8af121e24a47
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e73eded9bd524f1dce2d82e0dcd45a995aec36460f360989c7f0a3633f9064b
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f015a9439fe81810ee686e3def97f0a5628eb03eeb6647f2361e14acb54c6f1
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f1d2a8a90b6e9f82b7cd6b9d41f2426100a7a948c8be73b9cd03822077c3984
3
+ size 395786922
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff05ff0a2fd64497a4dc87555b334386e1340bf51e0cb26b9563e5605c8b120e
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abb0d68b551454c4d09eebc6d9d9cedb828570c4d6224ed8d949afef34928fd5
3
+ size 395786922
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e31665b8c7d78dc560352ba149d087420a03bb9929818edc19a4016a5f1b8ba
3
+ size 395786922
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 6.307283401489258,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.5954,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.8216760158538818,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.278,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 7.7118024826049805,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.2105,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 6.2828192710876465,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.0291,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.8280245065689087,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.3982,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.9868451356887817,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.2771,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.3482744693756104,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6548,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 1.1375393867492676,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.6873,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.463499903678894,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6526,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 5.554112911224365,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.2926,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.0732632875442505,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.461,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.5138278007507324,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.7348,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 2.534879446029663,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8571,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.698505163192749,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.521,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.4479676485061646,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3902,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.2816400527954102,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.4518,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 5.400059223175049,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.9999,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.5884220600128174,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.3547,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.3713040351867676,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7269,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.25373563170433044,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0746,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 4.246715068817139,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6696,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.6761362552642822,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6276,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.399017572402954,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9067,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.692711591720581,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1943,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.116909146308899,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2336,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.456255555152893,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.1465,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.06800251454114914,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.2881,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 4.372195720672607,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.6529,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 4.281543254852295,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.313,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 3.5601418018341064,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.3216,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.4214577674865723,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.1358,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.7352545261383057,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.4594,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 3.6488208770751953,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.1297,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.18936391174793243,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.2297,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.24005119502544403,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.2341,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.6526658535003662,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.4384,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.2854243516921997,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.2658,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.8762297630310059,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.5192,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 8.563094139099121,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.3001,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.5608336329460144,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2274,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.9091539978981018,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.2695,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.8625966310501099,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.2104,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 3.4447784423828125,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.5892,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.39084768295288086,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.2646,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 4.247977256774902,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.3688,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.8638454079627991,
329
+ "learning_rate": 2e-05,
330
+ "loss": 2.1514,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.26021599769592285,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0341,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.017536163330078,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.2675,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 2.068211793899536,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.256,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 6.009552478790283,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.8954,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 4915713315700736.0,
365
+ "train_loss": 0.6049574661254883,
366
+ "train_runtime": 165.4178,
367
+ "train_samples_per_second": 2.418,
368
+ "train_steps_per_second": 0.605
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 4915713315700736.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8ec9a76aa2a365afb2403325260e12dec27ad4b984f60c9e7f5a9f76ebbdad1
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0a23a2ed8e03fd3a4b21c1fea3e90f4eb1c0a3a99bedee884c388ca90b15211
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70680ac43b83326240e7835655831bc4ceaf9f0de955840f5c6d0c1f4afcc2f1
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e4b81455da8eff19455dde4b74de4297a754f18d10b63e691a538c6b9946a3f
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9b733c0813536ef08508eb156501dcb5bd61a3091f43cb3f3f6010fe4b4917
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e03eca3be62c25b2a2aa30e419a67ca025067a04b2c6b04bf385daaae97ddbd
3
+ size 184221358
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3787e3f8abd12f49c6d80d3cdcdede3a2f53808976bc36599bbc4428eea38f7
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6adb60b55506bfdd98859d59018a85cefb09d6e024b8fa128b3beb888dc5315
3
+ size 184220842
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 2.0921337604522705,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.6894,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 4.510809421539307,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6862,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 4.531048774719238,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.2015,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 3.931663751602173,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.5979,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 6.292150020599365,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.058,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 4.606175422668457,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8743,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.087507963180542,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6243,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 8.593318939208984,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4402,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 5.68522834777832,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.3279,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 1.9175809621810913,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.7962,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 8.656201362609863,
84
+ "learning_rate": 2e-05,
85
+ "loss": 2.0415,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.99509859085083,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.2472,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 7.396963119506836,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8892,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 5.109688758850098,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4696,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 3.3529930114746094,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6545,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.4711360931396484,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5218,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 4.621761322021484,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.9679,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 3.575629472732544,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5973,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 9.701947212219238,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.4922,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 18.55413818359375,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.1465,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 7.169260025024414,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.9151,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 7.758701324462891,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.3213,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 7.772892475128174,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.4431,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 3.169400215148926,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9307,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 6.446079730987549,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.4162,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 4.463912487030029,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8901,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 7.208362102508545,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.4832,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 11.086746215820312,
203
+ "learning_rate": 2e-05,
204
+ "loss": 2.0512,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 6.340665340423584,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.0799,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 2.8643109798431396,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.5789,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 7.802653789520264,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.8293,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 4.388473033905029,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.0891,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 4.431455612182617,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.9788,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.8305065035820007,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.6982,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 14.678071975708008,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.9248,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.8914413452148438,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.5398,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.2980284690856934,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.6722,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 4.637665271759033,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3349,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 2.5008809566497803,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.134,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.6805694103240967,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.935,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 7.456270694732666,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.5703,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 3.0763790607452393,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.3613,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 8.599203109741211,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.9858,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 7.865464687347412,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.5265,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 7.330792427062988,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.392,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 5.383084774017334,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.248,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 6.0959649085998535,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.9657,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.155146598815918,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8844,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 5.614543437957764,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.4389,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 8.140727043151855,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.3601,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2098644891205632.0,
365
+ "train_loss": 1.066049919128418,
366
+ "train_runtime": 99.4099,
367
+ "train_samples_per_second": 4.024,
368
+ "train_steps_per_second": 1.006
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2098644891205632.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3506ffc3670759125f1d1c08b6420c1bf38b2f186b977f9cbb77ec8fcbeea88e
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be4b6efdf8aeda1d55eab73770ec3ff6a9a14501a2aa60e833ec969ed086533
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90585364faf7ecfbee025db3a036cc013caf849e1bed9b8c5d1c4570c6065777
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9236b8e15ebe274a58931ff42727de3f819bbb0f8493426399ff78877af9de2b
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a161db119e3dd4b0ad54e2dc5dae4212100d532c0ec87749b3eb4d37f395bf48
3
+ size 395786922
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b3859505775fae6ab29fd2053ab49c87ca8a84e54cda0d5304f7c72b066afd
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e363377b0c5548371d96debaaf6fff6fa705a7ca1321530510e181d3556b7e
3
+ size 395786922
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd95080fae073060aa659888b415d2135edd62af172653cfed0f99ab94464f6
3
+ size 395786922
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 3.6383607387542725,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.9771,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.4260591268539429,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.5006,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.6014289855957031,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.782,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 2.321310043334961,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.3546,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.937113881111145,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.2421,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 2.4271795749664307,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.005,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.9630257487297058,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.635,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.273303985595703,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.9471,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.193511724472046,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.6021,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.983036518096924,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.6765,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.626178503036499,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.0259,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.8325883150100708,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5989,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 2.876636266708374,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8092,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 2.8380074501037598,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.5194,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.227553129196167,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6971,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.7015953063964844,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.6297,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 3.0312774181365967,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.3067,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.7130300998687744,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.702,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 0.3681621849536896,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1352,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.8578549027442932,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.1746,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 1.2145777940750122,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.62,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 0.8366961479187012,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.237,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.8259220123291016,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.1652,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 2.451016426086426,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.2307,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 7.296032905578613,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.6684,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.7998406887054443,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.5629,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 1.9942561388015747,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.4002,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 3.880875587463379,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.5188,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 5.010158538818359,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.6704,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 6.04047155380249,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.8029,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 3.4900665283203125,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.9319,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.4386566877365112,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.392,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.075533866882324,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.0757,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 5.302359104156494,
245
+ "learning_rate": 2e-05,
246
+ "loss": 1.017,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 3.4823062419891357,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1879,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.5839654803276062,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.1357,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 3.582629442214966,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.5566,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.8023282289505005,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.513,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.7795921564102173,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.8818,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.052372932434082,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.6126,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.606740117073059,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.5911,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.955881953239441,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6584,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.623976469039917,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.5529,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 2.0439250469207764,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.9415,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.6250275373458862,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.1703,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 2.0065391063690186,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.6163,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 3.4686758518218994,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.1005,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.2346303462982178,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8502,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 3.8489227294921875,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.3295,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.55889892578125,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.634,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5692822418096128.0,
365
+ "train_loss": 0.71892333984375,
366
+ "train_runtime": 165.0653,
367
+ "train_samples_per_second": 2.423,
368
+ "train_steps_per_second": 0.606
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5692822418096128.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c033bed86fd20d1bf5f45a017a7235285089bde6b845487e1d24f22528cb78
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa0ede5f53ed6efb703e4de0c88d026168e8444ff6938342da4a8d4b540d5df
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c172229e49be46b6d28ec7d39d0dec151b6a3955e703aca7df6e3bab423f73b
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1781b39f8ac95a428751a6de041f2ef6a523a42fcaff84daefa48b7c5e687690
3
+ size 395787774
client_states_fedMultipqfullfreeze_homoAgg_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204e4484268b051e95249cd0b0d80d5fc1c8b309c758d33fca781ee24da8090f
3
+ size 395786922