thkim0305 commited on
Commit
a241385
·
verified ·
1 Parent(s): 08c4bfe

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
  2. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
  3. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
  4. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
  5. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
  6. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
  7. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
  8. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
  9. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json +392 -0
  10. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
  11. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
  12. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
  13. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
  14. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
  15. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
  16. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
  17. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
  18. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json +392 -0
  19. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
  20. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
  21. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
  22. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
  23. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
  24. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
  25. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
  26. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
  27. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json +392 -0
  28. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
  29. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
  30. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
  31. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
  32. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
  33. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
  34. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
  35. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
  36. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json +392 -0
  37. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
  38. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
  39. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
  40. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
  41. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
  42. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
  43. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
  44. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
  45. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json +392 -0
  46. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
  47. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
  48. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
  49. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
  50. client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601088f397eeb3909ce8e91cfb116fca6b84fc18d36265a3741a8a6c06aa0205
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972fef381446dc54bb81002f849cea881ea6b4efd9f7415f7c2c1373b9b5bd66
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf60673445bdd44111c8e6ab141a0477e485f5bd2e6214db12f6a1de7ac3366
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da479f80097fdef6726424fff54e48c3d5a490f482fdb9070c5d237a52e37e8c
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a41fb606f45f68ffb68b8c0c9c8f10c507bf08713651dd8823b5a65e17b4f6
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b79245c1084550d87dc88caeb54be1fb2882af094eafe31cfc3c2fc3d8ef9cb
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669c5d9f2adedf284327718bb5ef779557cb3689d3eb72257288b6ab968d6d4f
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b04fa35c89561db794f4a8fcdf7083eef3e4aef17ee8943e20c2188189708ca6
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/0_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.3103408813476562,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.0898,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 3.532261371612549,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.535,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.486604928970337,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.4653,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 6.665052890777588,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.5876,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 2.4172110557556152,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.5611,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.8320339918136597,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.088,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 4.306198596954346,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.498,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.505366802215576,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.2767,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 4.672132968902588,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.3158,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 3.8136868476867676,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8975,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 0.8911241888999939,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.1162,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 6.382510185241699,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.6136,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 5.372596740722656,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.363,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 4.140794277191162,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4008,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 8.768181800842285,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.9195,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 3.8078761100769043,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.3291,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 6.264554023742676,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.767,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 11.220328330993652,
133
+ "learning_rate": 2e-05,
134
+ "loss": 1.473,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 3.9702634811401367,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.3766,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 2.120680332183838,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.3277,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 6.803924560546875,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.4957,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 2.5803332328796387,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.1319,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 9.29668140411377,
168
+ "learning_rate": 2e-05,
169
+ "loss": 1.9043,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 3.864070415496826,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.515,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.14567677676677704,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.3086,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.552821159362793,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.693,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 7.949587345123291,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.5568,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 1.4345118999481201,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.617,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 4.910844802856445,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.1581,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.9731581211090088,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.3823,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 10.148768424987793,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.3181,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 4.756891250610352,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.446,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.3829259872436523,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.1589,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.3166489601135254,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.8197,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 1.1867741346359253,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.0674,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 11.073589324951172,
259
+ "learning_rate": 2e-05,
260
+ "loss": 2.0834,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.9917004108428955,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.7352,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.03345826640725136,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.1388,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 1.6753065586090088,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.1295,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.0468225479125977,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2035,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.6748114228248596,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.074,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 2.4343173503875732,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.1473,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.3457399904727936,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0603,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.3236846923828125,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.3552,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.8542829751968384,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0992,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.0302435159683228,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.1318,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 6.573387622833252,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.1148,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 7.864552974700928,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.3684,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.19719119369983673,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.301,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 0.3533047139644623,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.0359,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2053446886752256.0,
365
+ "train_loss": 0.5710413241386414,
366
+ "train_runtime": 100.1311,
367
+ "train_samples_per_second": 3.995,
368
+ "train_steps_per_second": 0.999
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2053446886752256.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515801884b96a34fbe54560143329ce06d512f7bc9f748e41058588403e77c22
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bbc94f43e777c70d6db0593944717c276f0797216f959f3afc34de5433ab17d
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f33db7dfc952f99bdfd6f5ca79752d67f0bd782aedc6f71bc47157491890465b
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:152bd53476880a0673ea49289cc36843973ad9b0b0f54cf45bf1fae800f555c6
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61bb6099a0787cce3325f6a28e587efc290398fb146f4d2404e4e1037d5e73a
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c86fea7f6a5f529d490af7ffa0540adb01ce00513d4cf95cdbb2fd4bcfc187f
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0440ab6c1705f26b96e4fb57a69009a2be64bf2be9750ec3be57311aafa7860
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98ba3961e9695875a3f1f66ffaf6fe5b528a8f0031ab86107cf824c6c890770f
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/1_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 4.053181171417236,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.3885,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 0.0796952024102211,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0052,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 3.2136785984039307,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.2577,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 0.09704623371362686,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.0057,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 0.2581525146961212,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.0124,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 0.7397000789642334,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.0302,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.1551579236984253,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.0662,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 3.3530216217041016,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4311,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 0.0396430566906929,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.0086,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 0.048732295632362366,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.0033,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.054931640625,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.0521,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.3746412992477417,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.0877,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.6224493980407715,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.3346,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 0.21258927881717682,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.0154,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 0.0053559038788080215,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5832,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 2.0714592933654785,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.1197,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 0.034640390425920486,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.0017,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 0.035930756479501724,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.0035,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 3.9088540077209473,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1674,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.10309179872274399,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0052,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 0.11337645351886749,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.0074,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 0.006353128235787153,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.006,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 0.11211053282022476,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.0061,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.7047258615493774,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.0202,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 0.10752872377634048,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.0068,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 11.707930564880371,
189
+ "learning_rate": 2e-05,
190
+ "loss": 1.1686,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 1.2754460573196411,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.0411,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 0.32534557580947876,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.0346,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 0.7839933633804321,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.169,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 0.016284512355923653,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.0085,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 0.01285564061254263,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.0037,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 0.017023563385009766,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.0031,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 0.5894005298614502,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.0344,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 2.5769262313842773,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.2399,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.11012467741966248,
252
+ "learning_rate": 2e-05,
253
+ "loss": 0.0076,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.2998049855232239,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.0185,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 0.9940203428268433,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.0655,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 0.38424286246299744,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.0414,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.009840243496000767,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.0233,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.0006893413374200463,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.401,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.08065320551395416,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.0393,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.3744912147521973,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.0766,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 0.011814103461802006,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.0015,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.8807648420333862,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.0752,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.019948307424783707,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.0012,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 0.02643703483045101,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.0024,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.035402942448854446,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0249,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 0.0117949815467,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.0014,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 0.026442598551511765,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.0019,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 8.142333030700684,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.2835,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2069576665792512.0,
365
+ "train_loss": 0.10788208454847335,
366
+ "train_runtime": 99.8712,
367
+ "train_samples_per_second": 4.005,
368
+ "train_steps_per_second": 1.001
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2069576665792512.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f78b25bab6835f62f9822e5b6287e857a1b5ec83852ec2504feee15bba61bae
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0899b6fad8fcee471b664ea79e2ee9761b6fd2ee15b275fc97bc89a39f366260
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09828ae718f740f9789b0aad26812e31e7bfbaa7d1345bbe0d6f2133273618ae
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e699f47be1f086e80f5927ab87c970e415fc2249dacbe477de24b2d4b4f290
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98076d1a485dee413d0a3058c25ae735c0c60b05929dabc07edc8b07230a699d
3
+ size 395786922
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc202d5d7a13afe84562def5972aa2b53b55494013e0c796ee940e04ac06411
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e35590c3355bd5576bfdc1cdf900c8965358057209a964522dbc96cc45bdfd
3
+ size 395786922
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e88943196a7a63864f254c5584ef10c4ebe5eb4a197c6fe2c426104db1c90fa
3
+ size 395786922
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/2_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 6.272475242614746,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.4564,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.9333751201629639,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.2648,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 8.752674102783203,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.0459,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 6.856117248535156,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.074,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 1.8297804594039917,
42
+ "learning_rate": 2e-05,
43
+ "loss": 0.2858,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 1.140818476676941,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.3342,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 1.5394253730773926,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.5499,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 0.8623746633529663,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.5739,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.4835646152496338,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5788,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 5.786844253540039,
77
+ "learning_rate": 2e-05,
78
+ "loss": 1.1156,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.184555172920227,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.393,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.456709146499634,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5986,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 2.970411539077759,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.9743,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 3.235511064529419,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.5695,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 1.388797640800476,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.3121,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 0.7918416857719421,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.4166,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 5.021376132965088,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.0664,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.587064266204834,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.4099,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 2.4496819972991943,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.7411,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.2078651487827301,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.0655,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 4.392916202545166,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.6521,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 1.40581476688385,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.6168,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 3.039031744003296,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9925,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 0.7090831995010376,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.1715,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 1.347177267074585,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.2748,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 1.3329389095306396,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.1272,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 0.08257925510406494,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.2341,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 4.227807998657227,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.6303,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 4.126163959503174,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.1953,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 4.300910472869873,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.3218,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 1.9219881296157837,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.1746,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 2.9596614837646484,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.4584,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 3.6123971939086914,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.208,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.38876235485076904,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.2364,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 0.2788524329662323,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.1004,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 0.8127436637878418,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.4335,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 1.1976258754730225,
266
+ "learning_rate": 2e-05,
267
+ "loss": 0.3541,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 2.5443003177642822,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.5296,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 7.851327419281006,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.6387,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 0.6018063426017761,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.2443,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 0.8378749489784241,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.3417,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.4673182964324951,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.1931,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.720952033996582,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.4961,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 0.9158133268356323,
315
+ "learning_rate": 2e-05,
316
+ "loss": 0.3774,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 3.9416582584381104,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.3777,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 4.797536849975586,
329
+ "learning_rate": 2e-05,
330
+ "loss": 2.2166,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 0.18337757885456085,
336
+ "learning_rate": 2e-05,
337
+ "loss": 0.0285,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.1316816806793213,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.2311,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 2.1029789447784424,
350
+ "learning_rate": 2e-05,
351
+ "loss": 0.2891,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 5.11422061920166,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.8276,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 4915713315700736.0,
365
+ "train_loss": 0.5959944343566894,
366
+ "train_runtime": 164.7489,
367
+ "train_samples_per_second": 2.428,
368
+ "train_steps_per_second": 0.607
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 4915713315700736.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b90a5dd0fd661e09a67c5ff5ce2e4e6b7f27bce3cc8819b4a12d07ed2e923d0
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5707c613f0ab26a54511a0e422c567178a4e9646a71cb5ea16ca021a852528f
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128ef94d7638cf3e09c9ad1624721e707d92df02f600fe7d15049c93dfba2d44
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd25ea69cc102dc88a95c6e544b5ccb62a87a364e695ecc06a48c0d829d2107
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8fcd31d481950ba1813d170fae25768537f117f828c5e6493d281d312ec2ff0
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:982149de6ff717c45e71c05f899d16246f64df8b457c4d0255e66955582870d6
3
+ size 184221358
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:856d2908d444479fef018729000b9d0427d423ef37e51aaf9c7879b8975afedd
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91202957aa3e8f9c37e874e055083b17873ffbbb16fb5e7ed3f6eb86c0a00099
3
+ size 184220842
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/3_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 1.8781095743179321,
14
+ "learning_rate": 2e-05,
15
+ "loss": 0.7201,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 4.64780330657959,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.7212,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 4.34021520614624,
28
+ "learning_rate": 2e-05,
29
+ "loss": 1.1729,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 3.4610328674316406,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.5734,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 6.560781478881836,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.0993,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 4.692245960235596,
49
+ "learning_rate": 2e-05,
50
+ "loss": 0.8895,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 3.2022180557250977,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.6035,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 9.959211349487305,
63
+ "learning_rate": 2e-05,
64
+ "loss": 0.4861,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 4.851236343383789,
70
+ "learning_rate": 2e-05,
71
+ "loss": 1.23,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.00787353515625,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.8202,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 7.79062557220459,
84
+ "learning_rate": 2e-05,
85
+ "loss": 1.9206,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 2.094217538833618,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.26,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 7.391254901885986,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8843,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 5.2949700355529785,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.4376,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 3.2532601356506348,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.5991,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 1.2858636379241943,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.5054,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 4.041250705718994,
126
+ "learning_rate": 2e-05,
127
+ "loss": 0.9352,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 3.889521598815918,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.5882,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 9.420170783996582,
140
+ "learning_rate": 2e-05,
141
+ "loss": 1.4432,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 16.8153133392334,
147
+ "learning_rate": 2e-05,
148
+ "loss": 1.3346,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 7.50994873046875,
154
+ "learning_rate": 2e-05,
155
+ "loss": 1.0825,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 8.1231107711792,
161
+ "learning_rate": 2e-05,
162
+ "loss": 1.4161,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 8.429718017578125,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.49,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 3.6227402687072754,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.9992,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 5.889152526855469,
182
+ "learning_rate": 2e-05,
183
+ "loss": 1.4035,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 4.411243915557861,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.8505,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 7.3300628662109375,
196
+ "learning_rate": 2e-05,
197
+ "loss": 1.5046,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 10.950540542602539,
203
+ "learning_rate": 2e-05,
204
+ "loss": 2.0556,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 6.158329963684082,
210
+ "learning_rate": 2e-05,
211
+ "loss": 1.1124,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 3.026836395263672,
217
+ "learning_rate": 2e-05,
218
+ "loss": 0.6212,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 7.47479248046875,
224
+ "learning_rate": 2e-05,
225
+ "loss": 1.7262,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 4.971011161804199,
231
+ "learning_rate": 2e-05,
232
+ "loss": 1.1455,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 3.6353936195373535,
238
+ "learning_rate": 2e-05,
239
+ "loss": 0.9557,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 0.9492896199226379,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.741,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 14.06840705871582,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.9984,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 2.856049060821533,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.5109,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 2.644953966140747,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.8948,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 4.469645977020264,
273
+ "learning_rate": 2e-05,
274
+ "loss": 1.3262,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 2.310840368270874,
280
+ "learning_rate": 2e-05,
281
+ "loss": 1.0936,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.9231162071228027,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.9224,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 7.485569477081299,
294
+ "learning_rate": 2e-05,
295
+ "loss": 1.5591,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 3.495986223220825,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.3848,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 8.537229537963867,
308
+ "learning_rate": 2e-05,
309
+ "loss": 1.1093,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 7.835823059082031,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.5268,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 7.483861446380615,
322
+ "learning_rate": 2e-05,
323
+ "loss": 1.4335,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 6.753383636474609,
329
+ "learning_rate": 2e-05,
330
+ "loss": 1.2968,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 5.714827537536621,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.7101,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.2157304286956787,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.8783,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 5.616714954376221,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.445,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 7.787622451782227,
357
+ "learning_rate": 2e-05,
358
+ "loss": 1.4313,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 2098644891205632.0,
365
+ "train_loss": 1.0769911789894104,
366
+ "train_runtime": 99.8148,
367
+ "train_samples_per_second": 4.007,
368
+ "train_steps_per_second": 1.002
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 2098644891205632.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc04153b001e017d209e57e4f7e110ceffdcf8233d0d3c3af57e0f2fe48da24a
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d713770573d500be237293e72b94a1cc8dddbc8d4c7665b4020e3ff56dacf989
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d91972bff7e58989a0015e2c3ec3b73758bc6aa261e4de87fcbb0af0d536f409
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add14524942e24d06b6fa87becf4438589ced02a70a0bb53df810aa4ac3da857
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf1edf6d867ba7dd0fece4032f4c607a03cd3e354e59a4e213e6d2131f6f799
3
+ size 395786922
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c2b00b249d1803956e5e975f2f439efc91691d91b31331ad305f184953e805
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39d888f201ae7da27f5342683ce24f06ab6863474eaa5a4ee43ffacd50993b2
3
+ size 395786922
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_client_model_round7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c616640cd02faeca67fb9ad8c43c9ff3378582a127b880526740e422052d27b
3
+ size 395786922
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/4_trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 3.8667259216308594,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.0569,
16
+ "step": 2
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 1.0802192687988281,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.4849,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 1.5820523500442505,
28
+ "learning_rate": 2e-05,
29
+ "loss": 0.8122,
30
+ "step": 6
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 3.165304183959961,
35
+ "learning_rate": 2e-05,
36
+ "loss": 0.4609,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 2.1040215492248535,
42
+ "learning_rate": 2e-05,
43
+ "loss": 1.3716,
44
+ "step": 10
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 2.737826347351074,
49
+ "learning_rate": 2e-05,
50
+ "loss": 1.1401,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 0.948131263256073,
56
+ "learning_rate": 2e-05,
57
+ "loss": 0.632,
58
+ "step": 14
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 2.155320405960083,
63
+ "learning_rate": 2e-05,
64
+ "loss": 1.0287,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 1.2282243967056274,
70
+ "learning_rate": 2e-05,
71
+ "loss": 0.5867,
72
+ "step": 18
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 2.5964159965515137,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.6555,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 0.22,
83
+ "grad_norm": 1.4527921676635742,
84
+ "learning_rate": 2e-05,
85
+ "loss": 0.9789,
86
+ "step": 22
87
+ },
88
+ {
89
+ "epoch": 0.24,
90
+ "grad_norm": 1.5666605234146118,
91
+ "learning_rate": 2e-05,
92
+ "loss": 0.5485,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "grad_norm": 3.837740182876587,
98
+ "learning_rate": 2e-05,
99
+ "loss": 0.8414,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 0.28,
104
+ "grad_norm": 1.679490566253662,
105
+ "learning_rate": 2e-05,
106
+ "loss": 0.6234,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 0.3,
111
+ "grad_norm": 2.4019229412078857,
112
+ "learning_rate": 2e-05,
113
+ "loss": 0.6795,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 0.32,
118
+ "grad_norm": 3.0277788639068604,
119
+ "learning_rate": 2e-05,
120
+ "loss": 0.6551,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 3.1049275398254395,
126
+ "learning_rate": 2e-05,
127
+ "loss": 1.3996,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 0.36,
132
+ "grad_norm": 2.1981680393218994,
133
+ "learning_rate": 2e-05,
134
+ "loss": 0.6906,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "grad_norm": 0.4658830463886261,
140
+ "learning_rate": 2e-05,
141
+ "loss": 0.1708,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 0.4,
146
+ "grad_norm": 0.9990465044975281,
147
+ "learning_rate": 2e-05,
148
+ "loss": 0.2357,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 0.42,
153
+ "grad_norm": 1.218131184577942,
154
+ "learning_rate": 2e-05,
155
+ "loss": 0.604,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 0.44,
160
+ "grad_norm": 0.8305265307426453,
161
+ "learning_rate": 2e-05,
162
+ "loss": 0.263,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 0.46,
167
+ "grad_norm": 4.220275402069092,
168
+ "learning_rate": 2e-05,
169
+ "loss": 0.9145,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 0.48,
174
+ "grad_norm": 3.2627382278442383,
175
+ "learning_rate": 2e-05,
176
+ "loss": 0.2621,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "grad_norm": 7.637389183044434,
182
+ "learning_rate": 2e-05,
183
+ "loss": 0.6855,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 0.52,
188
+ "grad_norm": 2.2909798622131348,
189
+ "learning_rate": 2e-05,
190
+ "loss": 0.4636,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "grad_norm": 1.6831642389297485,
196
+ "learning_rate": 2e-05,
197
+ "loss": 0.3304,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 0.56,
202
+ "grad_norm": 3.669160842895508,
203
+ "learning_rate": 2e-05,
204
+ "loss": 0.3234,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "grad_norm": 9.243528366088867,
210
+ "learning_rate": 2e-05,
211
+ "loss": 0.9173,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 0.6,
216
+ "grad_norm": 7.592850685119629,
217
+ "learning_rate": 2e-05,
218
+ "loss": 1.0405,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "grad_norm": 4.612028121948242,
224
+ "learning_rate": 2e-05,
225
+ "loss": 0.9296,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 0.64,
230
+ "grad_norm": 1.7067667245864868,
231
+ "learning_rate": 2e-05,
232
+ "loss": 0.4996,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "grad_norm": 2.345529079437256,
238
+ "learning_rate": 2e-05,
239
+ "loss": 1.2364,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 0.68,
244
+ "grad_norm": 4.779994487762451,
245
+ "learning_rate": 2e-05,
246
+ "loss": 0.9,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "grad_norm": 3.520698070526123,
252
+ "learning_rate": 2e-05,
253
+ "loss": 1.274,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "grad_norm": 1.043005108833313,
259
+ "learning_rate": 2e-05,
260
+ "loss": 0.1549,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "grad_norm": 4.629940509796143,
266
+ "learning_rate": 2e-05,
267
+ "loss": 1.5627,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 0.76,
272
+ "grad_norm": 1.3093677759170532,
273
+ "learning_rate": 2e-05,
274
+ "loss": 0.5079,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "grad_norm": 0.6198597550392151,
280
+ "learning_rate": 2e-05,
281
+ "loss": 0.9565,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 0.8,
286
+ "grad_norm": 2.102959156036377,
287
+ "learning_rate": 2e-05,
288
+ "loss": 0.4706,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.82,
293
+ "grad_norm": 1.7533611059188843,
294
+ "learning_rate": 2e-05,
295
+ "loss": 0.6437,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "grad_norm": 1.9639290571212769,
301
+ "learning_rate": 2e-05,
302
+ "loss": 0.6471,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 0.86,
307
+ "grad_norm": 2.027463912963867,
308
+ "learning_rate": 2e-05,
309
+ "loss": 0.4873,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 0.88,
314
+ "grad_norm": 2.5160040855407715,
315
+ "learning_rate": 2e-05,
316
+ "loss": 1.1644,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 0.9,
321
+ "grad_norm": 0.7003493905067444,
322
+ "learning_rate": 2e-05,
323
+ "loss": 0.2009,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 0.92,
328
+ "grad_norm": 1.787532925605774,
329
+ "learning_rate": 2e-05,
330
+ "loss": 0.6372,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 0.94,
335
+ "grad_norm": 3.888639211654663,
336
+ "learning_rate": 2e-05,
337
+ "loss": 1.2432,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "grad_norm": 2.0158474445343018,
343
+ "learning_rate": 2e-05,
344
+ "loss": 0.7993,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 0.98,
349
+ "grad_norm": 3.9803707599639893,
350
+ "learning_rate": 2e-05,
351
+ "loss": 1.2802,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 1.0,
356
+ "grad_norm": 2.657550573348999,
357
+ "learning_rate": 2e-05,
358
+ "loss": 0.7877,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "step": 100,
364
+ "total_flos": 5692822418096128.0,
365
+ "train_loss": 0.7448138999938965,
366
+ "train_runtime": 164.7682,
367
+ "train_samples_per_second": 2.428,
368
+ "train_steps_per_second": 0.607
369
+ }
370
+ ],
371
+ "logging_steps": 2,
372
+ "max_steps": 100,
373
+ "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 1,
375
+ "save_steps": 500,
376
+ "stateful_callbacks": {
377
+ "TrainerControl": {
378
+ "args": {
379
+ "should_epoch_stop": false,
380
+ "should_evaluate": false,
381
+ "should_log": false,
382
+ "should_save": false,
383
+ "should_training_stop": false
384
+ },
385
+ "attributes": {}
386
+ }
387
+ },
388
+ "total_flos": 5692822418096128.0,
389
+ "train_batch_size": 1,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10b6891ffb0d7ee2bbc7d4c5e22e7d3914155f461e036c14c64145d94b777bf
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7284e5b745babed407499e559af061cd69c87a12563b08b6075b15e2c942b8fc
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84518474151c39c0ae7a01ab9e58001c8b59b1b4a6e02b12d1588ff9138cfd4
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba1d8705800a9de15220cdaab6edd725444a970ab5b0c3958e08f93f6b170d4d
3
+ size 395787774
client_states_fedMultipqfullfreeze_sft_NOCONT_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr100_T0125_decay099_SEED2/5_client_model_round2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8924b694f6781834a72230a07694dc5273317d364888d728ebeb1bc0830255d
3
+ size 395786922