thkim0305
/

feddat_baselines

Model card Files Files and versions Community

thkim0305 commited on 15 days ago

Commit

ae71b7c

verified ·

1 Parent(s): e1c3736

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json +392 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json +392 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json +392 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json +392 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json +392 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth +3 -0
client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth +3 -0

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9391214ff1923b83752a8d3951b5d3401a5ec6d0246781854e0201f97f9f82b3
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8ff8dd214a728fe9583f1d2f40c92851b977c13963f43257431bd0afcbbad2c
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9bfe57805c7c5080f34d0a9b1d11f82f05e58fdce3169d5d4cab80a6b73d0ae
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:616a2c97b60a139f9cc9ddbc270586849cc31afc3bcce85ae5087b5fe4964eb3
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4882a2e3ee217c356fc14775ece3eb957872f6040ed76f9dd5fdf02601715dd
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3cbd61ffa32e5a09cab1b8df46c660a822400be159e93bb759a75e0c8897905
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbf69e6bab55d931c75b4629ca7f94f6d85e67a577f60d56d3ad0356e939e732
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f01537df585cceb4f79ff0535872d97637653cd1913821410649382c6045014b
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "grad_norm": 6.59199857711792,
+      "learning_rate": 2e-05,
+      "loss": 0.5286,
+      "step": 2
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 4.61942720413208,
+      "learning_rate": 2e-05,
+      "loss": 0.97,
+      "step": 4
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 4.521786212921143,
+      "learning_rate": 2e-05,
+      "loss": 0.3505,
+      "step": 6
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.39112570881843567,
+      "learning_rate": 2e-05,
+      "loss": 0.115,
+      "step": 8
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 3.506913900375366,
+      "learning_rate": 2e-05,
+      "loss": 0.669,
+      "step": 10
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 7.119588375091553,
+      "learning_rate": 2e-05,
+      "loss": 0.6728,
+      "step": 12
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 4.159899711608887,
+      "learning_rate": 2e-05,
+      "loss": 0.4872,
+      "step": 14
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 5.473998069763184,
+      "learning_rate": 2e-05,
+      "loss": 1.3448,
+      "step": 16
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 9.59962272644043,
+      "learning_rate": 2e-05,
+      "loss": 1.1658,
+      "step": 18
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 5.037639617919922,
+      "learning_rate": 2e-05,
+      "loss": 0.437,
+      "step": 20
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 3.210973024368286,
+      "learning_rate": 2e-05,
+      "loss": 0.2022,
+      "step": 22
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 3.4891958236694336,
+      "learning_rate": 2e-05,
+      "loss": 0.3925,
+      "step": 24
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 2.502493381500244,
+      "learning_rate": 2e-05,
+      "loss": 0.5882,
+      "step": 26
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 0.41307923197746277,
+      "learning_rate": 2e-05,
+      "loss": 0.6127,
+      "step": 28
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 2.3616442680358887,
+      "learning_rate": 2e-05,
+      "loss": 0.6964,
+      "step": 30
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 5.551742076873779,
+      "learning_rate": 2e-05,
+      "loss": 0.2612,
+      "step": 32
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 4.289650917053223,
+      "learning_rate": 2e-05,
+      "loss": 0.3091,
+      "step": 34
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 13.974275588989258,
+      "learning_rate": 2e-05,
+      "loss": 0.8659,
+      "step": 36
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 2.4527721405029297,
+      "learning_rate": 2e-05,
+      "loss": 0.7772,
+      "step": 38
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 2.5927505493164062,
+      "learning_rate": 2e-05,
+      "loss": 0.162,
+      "step": 40
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 0.6561378836631775,
+      "learning_rate": 2e-05,
+      "loss": 0.0605,
+      "step": 42
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 4.918743133544922,
+      "learning_rate": 2e-05,
+      "loss": 0.7217,
+      "step": 44
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 0.39737483859062195,
+      "learning_rate": 2e-05,
+      "loss": 0.1857,
+      "step": 46
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 14.40439224243164,
+      "learning_rate": 2e-05,
+      "loss": 1.4849,
+      "step": 48
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.6556407809257507,
+      "learning_rate": 2e-05,
+      "loss": 0.1272,
+      "step": 50
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 9.867362976074219,
+      "learning_rate": 2e-05,
+      "loss": 0.4252,
+      "step": 52
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 2.099479913711548,
+      "learning_rate": 2e-05,
+      "loss": 0.0928,
+      "step": 54
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 4.53439474105835,
+      "learning_rate": 2e-05,
+      "loss": 0.3139,
+      "step": 56
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 7.238653659820557,
+      "learning_rate": 2e-05,
+      "loss": 1.6104,
+      "step": 58
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 1.539478063583374,
+      "learning_rate": 2e-05,
+      "loss": 0.1581,
+      "step": 60
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 1.8394383192062378,
+      "learning_rate": 2e-05,
+      "loss": 0.4265,
+      "step": 62
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.7109262943267822,
+      "learning_rate": 2e-05,
+      "loss": 0.1265,
+      "step": 64
+    },
+    {
+      "epoch": 0.66,
+      "grad_norm": 1.0773893594741821,
+      "learning_rate": 2e-05,
+      "loss": 0.2364,
+      "step": 66
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 3.935823917388916,
+      "learning_rate": 2e-05,
+      "loss": 1.0373,
+      "step": 68
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 7.714654922485352,
+      "learning_rate": 2e-05,
+      "loss": 0.6007,
+      "step": 70
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 8.1398286819458,
+      "learning_rate": 2e-05,
+      "loss": 0.8645,
+      "step": 72
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 7.816657066345215,
+      "learning_rate": 2e-05,
+      "loss": 2.3697,
+      "step": 74
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 7.676031589508057,
+      "learning_rate": 2e-05,
+      "loss": 0.59,
+      "step": 76
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 1.7220346927642822,
+      "learning_rate": 2e-05,
+      "loss": 0.9622,
+      "step": 78
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 3.681994915008545,
+      "learning_rate": 2e-05,
+      "loss": 0.4974,
+      "step": 80
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 7.639248847961426,
+      "learning_rate": 2e-05,
+      "loss": 0.9375,
+      "step": 82
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 9.04971694946289,
+      "learning_rate": 2e-05,
+      "loss": 1.22,
+      "step": 84
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 1.4018713235855103,
+      "learning_rate": 2e-05,
+      "loss": 0.0818,
+      "step": 86
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 4.88040828704834,
+      "learning_rate": 2e-05,
+      "loss": 0.7108,
+      "step": 88
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 2.9837918281555176,
+      "learning_rate": 2e-05,
+      "loss": 0.3657,
+      "step": 90
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 1.0587419271469116,
+      "learning_rate": 2e-05,
+      "loss": 0.3112,
+      "step": 92
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 9.619867324829102,
+      "learning_rate": 2e-05,
+      "loss": 2.6249,
+      "step": 94
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.8182443380355835,
+      "learning_rate": 2e-05,
+      "loss": 1.052,
+      "step": 96
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 5.4298481941223145,
+      "learning_rate": 2e-05,
+      "loss": 0.983,
+      "step": 98
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.9551576375961304,
+      "learning_rate": 2e-05,
+      "loss": 0.3735,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "step": 100,
+      "total_flos": 2722175404474368.0,
+      "train_loss": 0.6631992340087891,
+      "train_runtime": 215.6578,
+      "train_samples_per_second": 1.855,
+      "train_steps_per_second": 0.464
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2722175404474368.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88cec14609ad32c290c19fe99cc2626c1573cbc85f114434a313145999b97e84
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df187ae799f263db8566f5385dc79e9c2623b1d6957e1928f94a77e61051ea6c
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ae4134c8cf9bd49bd9e9d46343957346c41793b1be9d466e80cfecfcea6f24a
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5289d21311312be0fde45820e0a508ba5cbfd66bb88d64b7ec96bbf4065de637
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c1da2662d2df70b5cf7a9cbac9949d8be681b8e616a3e9f8bd8f573d3ae40e3
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf4c5a1502c71922c0c95fafe0bced2fd2386a6dcb782663f81d80fe38eae27
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11cabf8f804c392f7578109c0fc6aeb86b7f26cd200fd331d5756dd441870339
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:058f762da3f1f0d383fcdfdbaa9c44ff0a75249cec5d1d061055f5c8bc46789d
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "grad_norm": 0.849377453327179,
+      "learning_rate": 2e-05,
+      "loss": 0.0279,
+      "step": 2
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 0.02175198495388031,
+      "learning_rate": 2e-05,
+      "loss": 0.0578,
+      "step": 4
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.5898083448410034,
+      "learning_rate": 2e-05,
+      "loss": 0.0414,
+      "step": 6
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.41922903060913086,
+      "learning_rate": 2e-05,
+      "loss": 0.0256,
+      "step": 8
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.0018654951127246022,
+      "learning_rate": 2e-05,
+      "loss": 0.0004,
+      "step": 10
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.004117058124393225,
+      "learning_rate": 2e-05,
+      "loss": 0.1077,
+      "step": 12
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.047437746077775955,
+      "learning_rate": 2e-05,
+      "loss": 0.0009,
+      "step": 14
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 1.518896460533142,
+      "learning_rate": 2e-05,
+      "loss": 0.349,
+      "step": 16
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 0.045308228582143784,
+      "learning_rate": 2e-05,
+      "loss": 0.0889,
+      "step": 18
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.11829675734043121,
+      "learning_rate": 2e-05,
+      "loss": 0.0026,
+      "step": 20
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 0.005238677840679884,
+      "learning_rate": 2e-05,
+      "loss": 0.4929,
+      "step": 22
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.2110355943441391,
+      "learning_rate": 2e-05,
+      "loss": 0.008,
+      "step": 24
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 1.4381576776504517,
+      "learning_rate": 2e-05,
+      "loss": 0.0896,
+      "step": 26
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 16.31382942199707,
+      "learning_rate": 2e-05,
+      "loss": 0.2398,
+      "step": 28
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 0.05612191930413246,
+      "learning_rate": 2e-05,
+      "loss": 0.0021,
+      "step": 30
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.22438876330852509,
+      "learning_rate": 2e-05,
+      "loss": 0.0076,
+      "step": 32
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 1.866287350654602,
+      "learning_rate": 2e-05,
+      "loss": 0.0595,
+      "step": 34
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 0.045663584023714066,
+      "learning_rate": 2e-05,
+      "loss": 0.0009,
+      "step": 36
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 0.2734965682029724,
+      "learning_rate": 2e-05,
+      "loss": 0.0132,
+      "step": 38
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.5363508462905884,
+      "learning_rate": 2e-05,
+      "loss": 0.0329,
+      "step": 40
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 0.382310688495636,
+      "learning_rate": 2e-05,
+      "loss": 0.0192,
+      "step": 42
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 8.9566011428833,
+      "learning_rate": 2e-05,
+      "loss": 0.3838,
+      "step": 44
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 1.7171825170516968,
+      "learning_rate": 2e-05,
+      "loss": 0.0176,
+      "step": 46
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.6939023733139038,
+      "learning_rate": 2e-05,
+      "loss": 0.0231,
+      "step": 48
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.0025089113041758537,
+      "learning_rate": 2e-05,
+      "loss": 0.0026,
+      "step": 50
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 0.06536436825990677,
+      "learning_rate": 2e-05,
+      "loss": 0.2801,
+      "step": 52
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 0.20245826244354248,
+      "learning_rate": 2e-05,
+      "loss": 0.0073,
+      "step": 54
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.018265284597873688,
+      "learning_rate": 2e-05,
+      "loss": 0.0028,
+      "step": 56
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 0.13041305541992188,
+      "learning_rate": 2e-05,
+      "loss": 0.1354,
+      "step": 58
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.010534190572798252,
+      "learning_rate": 2e-05,
+      "loss": 1.0962,
+      "step": 60
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 0.2692118287086487,
+      "learning_rate": 2e-05,
+      "loss": 0.0085,
+      "step": 62
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.038933683186769485,
+      "learning_rate": 2e-05,
+      "loss": 0.0019,
+      "step": 64
+    },
+    {
+      "epoch": 0.66,
+      "grad_norm": 0.018930355086922646,
+      "learning_rate": 2e-05,
+      "loss": 0.0017,
+      "step": 66
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 0.7976124286651611,
+      "learning_rate": 2e-05,
+      "loss": 0.0334,
+      "step": 68
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 0.2943456470966339,
+      "learning_rate": 2e-05,
+      "loss": 0.0089,
+      "step": 70
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.12224985659122467,
+      "learning_rate": 2e-05,
+      "loss": 0.0067,
+      "step": 72
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 0.3579569458961487,
+      "learning_rate": 2e-05,
+      "loss": 0.0677,
+      "step": 74
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 0.02683216519653797,
+      "learning_rate": 2e-05,
+      "loss": 0.0016,
+      "step": 76
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 3.3340158462524414,
+      "learning_rate": 2e-05,
+      "loss": 0.2447,
+      "step": 78
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.009762264788150787,
+      "learning_rate": 2e-05,
+      "loss": 0.0083,
+      "step": 80
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.024613451212644577,
+      "learning_rate": 2e-05,
+      "loss": 0.0116,
+      "step": 82
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 0.009135313332080841,
+      "learning_rate": 2e-05,
+      "loss": 0.0008,
+      "step": 84
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 0.2523867189884186,
+      "learning_rate": 2e-05,
+      "loss": 0.4089,
+      "step": 86
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.32133349776268005,
+      "learning_rate": 2e-05,
+      "loss": 0.0093,
+      "step": 88
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.09947264194488525,
+      "learning_rate": 2e-05,
+      "loss": 0.0039,
+      "step": 90
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 0.12009107321500778,
+      "learning_rate": 2e-05,
+      "loss": 0.0069,
+      "step": 92
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 0.007163307163864374,
+      "learning_rate": 2e-05,
+      "loss": 0.0081,
+      "step": 94
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.4114590883255005,
+      "learning_rate": 2e-05,
+      "loss": 0.066,
+      "step": 96
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 0.04830743372440338,
+      "learning_rate": 2e-05,
+      "loss": 0.0046,
+      "step": 98
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.04225878044962883,
+      "learning_rate": 2e-05,
+      "loss": 0.0019,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "step": 100,
+      "total_flos": 2743212791824384.0,
+      "train_loss": 0.0904453244805336,
+      "train_runtime": 221.9331,
+      "train_samples_per_second": 1.802,
+      "train_steps_per_second": 0.451
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2743212791824384.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9be8024aa2eece99882c85b8046475baa83ad6283113525243ad4265e9bf7576
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce08ca256ac2358ea58f9bf1b6e1e689255df91a4f52dbeb33536a4dcc474533
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c586d85bfc4ed525de5cc0bcd8e9bb1def55fc41b70014035e84fbbdfc08625b
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d608fe1ce0b07b208d805224fc9aa2685c3edeca580c162648006f6425393528
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2072f36c89ed2416f35894c760f95c059d0947e40d2c851e6559e50ac1c610dd
+size 487635186

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99426839dd4d5f67e775bb10a4c52b99e22e14e2de2c8b8d162e9059a49513cc
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8586e15be816067f89397fd74efde646b7f477c27e24148b917e3bfa10b0d3e
+size 487635186

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dd9c04cdc272cfff8bddd92cc280c2385a2797d1a2f3a196861109207940d86
+size 487635186

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.3100306987762451,
+      "learning_rate": 2e-05,
+      "loss": 0.3864,
+      "step": 2
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 4.013766288757324,
+      "learning_rate": 2e-05,
+      "loss": 0.9063,
+      "step": 4
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 2.3978607654571533,
+      "learning_rate": 2e-05,
+      "loss": 1.1349,
+      "step": 6
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 4.109842300415039,
+      "learning_rate": 2e-05,
+      "loss": 0.825,
+      "step": 8
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 2.427557945251465,
+      "learning_rate": 2e-05,
+      "loss": 0.5722,
+      "step": 10
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 4.734681606292725,
+      "learning_rate": 2e-05,
+      "loss": 1.0887,
+      "step": 12
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.10447093099355698,
+      "learning_rate": 2e-05,
+      "loss": 0.5304,
+      "step": 14
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 2.675856828689575,
+      "learning_rate": 2e-05,
+      "loss": 0.9177,
+      "step": 16
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 1.6946303844451904,
+      "learning_rate": 2e-05,
+      "loss": 0.4756,
+      "step": 18
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 1.9363477230072021,
+      "learning_rate": 2e-05,
+      "loss": 0.7104,
+      "step": 20
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 0.7683027982711792,
+      "learning_rate": 2e-05,
+      "loss": 0.1338,
+      "step": 22
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 5.035861492156982,
+      "learning_rate": 2e-05,
+      "loss": 0.9689,
+      "step": 24
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 3.8543343544006348,
+      "learning_rate": 2e-05,
+      "loss": 0.523,
+      "step": 26
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 3.667877435684204,
+      "learning_rate": 2e-05,
+      "loss": 0.3602,
+      "step": 28
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 3.0984058380126953,
+      "learning_rate": 2e-05,
+      "loss": 0.5042,
+      "step": 30
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 1.3304017782211304,
+      "learning_rate": 2e-05,
+      "loss": 0.2943,
+      "step": 32
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 2.7662222385406494,
+      "learning_rate": 2e-05,
+      "loss": 0.5895,
+      "step": 34
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 0.6082299947738647,
+      "learning_rate": 2e-05,
+      "loss": 0.1146,
+      "step": 36
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 2.9511778354644775,
+      "learning_rate": 2e-05,
+      "loss": 0.7031,
+      "step": 38
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 3.04605770111084,
+      "learning_rate": 2e-05,
+      "loss": 0.3958,
+      "step": 40
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 3.8611412048339844,
+      "learning_rate": 2e-05,
+      "loss": 0.8662,
+      "step": 42
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 4.399799823760986,
+      "learning_rate": 2e-05,
+      "loss": 0.287,
+      "step": 44
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 2.2531864643096924,
+      "learning_rate": 2e-05,
+      "loss": 0.3218,
+      "step": 46
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.4663535356521606,
+      "learning_rate": 2e-05,
+      "loss": 1.0107,
+      "step": 48
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 1.7521519660949707,
+      "learning_rate": 2e-05,
+      "loss": 0.124,
+      "step": 50
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 8.896435737609863,
+      "learning_rate": 2e-05,
+      "loss": 0.8135,
+      "step": 52
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 2.5376131534576416,
+      "learning_rate": 2e-05,
+      "loss": 0.511,
+      "step": 54
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 6.814590930938721,
+      "learning_rate": 2e-05,
+      "loss": 1.0585,
+      "step": 56
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 2.1924495697021484,
+      "learning_rate": 2e-05,
+      "loss": 0.1282,
+      "step": 58
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.17645888030529022,
+      "learning_rate": 2e-05,
+      "loss": 0.0467,
+      "step": 60
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 2.022798538208008,
+      "learning_rate": 2e-05,
+      "loss": 0.7755,
+      "step": 62
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 3.513547897338867,
+      "learning_rate": 2e-05,
+      "loss": 1.0105,
+      "step": 64
+    },
+    {
+      "epoch": 0.66,
+      "grad_norm": 2.7813096046447754,
+      "learning_rate": 2e-05,
+      "loss": 0.3313,
+      "step": 66
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 14.315655708312988,
+      "learning_rate": 2e-05,
+      "loss": 1.5528,
+      "step": 68
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 3.795628309249878,
+      "learning_rate": 2e-05,
+      "loss": 0.5101,
+      "step": 70
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 1.2288278341293335,
+      "learning_rate": 2e-05,
+      "loss": 1.374,
+      "step": 72
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 4.386161804199219,
+      "learning_rate": 2e-05,
+      "loss": 0.1973,
+      "step": 74
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 3.3581271171569824,
+      "learning_rate": 2e-05,
+      "loss": 0.6263,
+      "step": 76
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 1.225866436958313,
+      "learning_rate": 2e-05,
+      "loss": 0.1825,
+      "step": 78
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 7.104559898376465,
+      "learning_rate": 2e-05,
+      "loss": 1.1801,
+      "step": 80
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 13.105498313903809,
+      "learning_rate": 2e-05,
+      "loss": 1.7541,
+      "step": 82
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 0.2902798056602478,
+      "learning_rate": 2e-05,
+      "loss": 0.0271,
+      "step": 84
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 4.769597053527832,
+      "learning_rate": 2e-05,
+      "loss": 0.5574,
+      "step": 86
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 3.8983893394470215,
+      "learning_rate": 2e-05,
+      "loss": 1.3119,
+      "step": 88
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 3.6488382816314697,
+      "learning_rate": 2e-05,
+      "loss": 1.0188,
+      "step": 90
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 0.29925721883773804,
+      "learning_rate": 2e-05,
+      "loss": 0.5366,
+      "step": 92
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 3.2376351356506348,
+      "learning_rate": 2e-05,
+      "loss": 0.6949,
+      "step": 94
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.63726806640625,
+      "learning_rate": 2e-05,
+      "loss": 0.2721,
+      "step": 96
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 5.418401718139648,
+      "learning_rate": 2e-05,
+      "loss": 1.4491,
+      "step": 98
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.0275309085845947,
+      "learning_rate": 2e-05,
+      "loss": 0.5931,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "step": 100,
+      "total_flos": 5059805727162368.0,
+      "train_loss": 0.6651638150215149,
+      "train_runtime": 277.116,
+      "train_samples_per_second": 1.443,
+      "train_steps_per_second": 0.361
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5059805727162368.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57fec58061a92ab1b1bf4a4c6012fc1397a46c90b77813f7cdd58f7cb3fe2ed3
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3401ff987c93089fb123edc132faa3c3adae7422977093002adb8f8d9588d91
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ac62e5e6fe65b335575f7ed7e9b1e3e428debec1fca2e10d9dc2123665b121b
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6901f0e6d410fbd830c0624606aa6c27c103266b25c9e6b95ab4a281cc6bfc6
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaaf2162f9a52ec84d0a5306af230ee2e2524d8b3ab4f9007c0eb79f88a34f95
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa5e605e3d74782eebb64e65d6103a2c51abb8a7defe4b8c0cf0e4b3c764165a
+size 302202238

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2765067dfd32667733c18170c3f622d661e26844689a324651bb49e31c0531eb
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a34222b6b3c027b0a0631a8b22b43c25f1371bfd82e5daf1543cf536e3c8723
+size 302201386

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "grad_norm": 4.537669658660889,
+      "learning_rate": 2e-05,
+      "loss": 1.832,
+      "step": 2
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.7357145547866821,
+      "learning_rate": 2e-05,
+      "loss": 0.6956,
+      "step": 4
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 4.840099334716797,
+      "learning_rate": 2e-05,
+      "loss": 1.1172,
+      "step": 6
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 6.74650239944458,
+      "learning_rate": 2e-05,
+      "loss": 1.2902,
+      "step": 8
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 5.235438346862793,
+      "learning_rate": 2e-05,
+      "loss": 0.8775,
+      "step": 10
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 7.184224605560303,
+      "learning_rate": 2e-05,
+      "loss": 2.0581,
+      "step": 12
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 12.11132526397705,
+      "learning_rate": 2e-05,
+      "loss": 2.1586,
+      "step": 14
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 5.101541042327881,
+      "learning_rate": 2e-05,
+      "loss": 1.0109,
+      "step": 16
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 4.515460014343262,
+      "learning_rate": 2e-05,
+      "loss": 0.7775,
+      "step": 18
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 1.4222784042358398,
+      "learning_rate": 2e-05,
+      "loss": 0.935,
+      "step": 20
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 4.021170139312744,
+      "learning_rate": 2e-05,
+      "loss": 0.8757,
+      "step": 22
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 2.8799550533294678,
+      "learning_rate": 2e-05,
+      "loss": 0.809,
+      "step": 24
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 0.9323534369468689,
+      "learning_rate": 2e-05,
+      "loss": 0.4597,
+      "step": 26
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 2.9174280166625977,
+      "learning_rate": 2e-05,
+      "loss": 0.3155,
+      "step": 28
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 3.026035785675049,
+      "learning_rate": 2e-05,
+      "loss": 0.6032,
+      "step": 30
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 8.995691299438477,
+      "learning_rate": 2e-05,
+      "loss": 0.9903,
+      "step": 32
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 1.847169041633606,
+      "learning_rate": 2e-05,
+      "loss": 1.3635,
+      "step": 34
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 9.795331001281738,
+      "learning_rate": 2e-05,
+      "loss": 2.0643,
+      "step": 36
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 1.7796157598495483,
+      "learning_rate": 2e-05,
+      "loss": 0.4105,
+      "step": 38
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 4.427263259887695,
+      "learning_rate": 2e-05,
+      "loss": 0.8952,
+      "step": 40
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 15.724753379821777,
+      "learning_rate": 2e-05,
+      "loss": 2.4695,
+      "step": 42
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 6.148226261138916,
+      "learning_rate": 2e-05,
+      "loss": 1.2556,
+      "step": 44
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 1.515079140663147,
+      "learning_rate": 2e-05,
+      "loss": 0.4941,
+      "step": 46
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.410451889038086,
+      "learning_rate": 2e-05,
+      "loss": 0.2573,
+      "step": 48
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 4.407503604888916,
+      "learning_rate": 2e-05,
+      "loss": 0.8896,
+      "step": 50
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 8.208944320678711,
+      "learning_rate": 2e-05,
+      "loss": 2.3883,
+      "step": 52
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 1.5512627363204956,
+      "learning_rate": 2e-05,
+      "loss": 0.6002,
+      "step": 54
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 5.655065059661865,
+      "learning_rate": 2e-05,
+      "loss": 0.9759,
+      "step": 56
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 2.2757630348205566,
+      "learning_rate": 2e-05,
+      "loss": 0.481,
+      "step": 58
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 9.758919715881348,
+      "learning_rate": 2e-05,
+      "loss": 1.1768,
+      "step": 60
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 4.106739521026611,
+      "learning_rate": 2e-05,
+      "loss": 0.7397,
+      "step": 62
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 4.5539231300354,
+      "learning_rate": 2e-05,
+      "loss": 0.7899,
+      "step": 64
+    },
+    {
+      "epoch": 0.66,
+      "grad_norm": 3.6534392833709717,
+      "learning_rate": 2e-05,
+      "loss": 0.6942,
+      "step": 66
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 4.095523357391357,
+      "learning_rate": 2e-05,
+      "loss": 1.1672,
+      "step": 68
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 6.486148834228516,
+      "learning_rate": 2e-05,
+      "loss": 1.2393,
+      "step": 70
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 8.842077255249023,
+      "learning_rate": 2e-05,
+      "loss": 1.7434,
+      "step": 72
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 8.89773178100586,
+      "learning_rate": 2e-05,
+      "loss": 1.2772,
+      "step": 74
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 18.22632598876953,
+      "learning_rate": 2e-05,
+      "loss": 3.3775,
+      "step": 76
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 2.2898244857788086,
+      "learning_rate": 2e-05,
+      "loss": 0.7762,
+      "step": 78
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 7.303566932678223,
+      "learning_rate": 2e-05,
+      "loss": 0.6705,
+      "step": 80
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 4.615819454193115,
+      "learning_rate": 2e-05,
+      "loss": 0.4729,
+      "step": 82
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 3.424020290374756,
+      "learning_rate": 2e-05,
+      "loss": 0.6039,
+      "step": 84
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 2.209282636642456,
+      "learning_rate": 2e-05,
+      "loss": 2.1192,
+      "step": 86
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 1.9839006662368774,
+      "learning_rate": 2e-05,
+      "loss": 0.9141,
+      "step": 88
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 1.8658138513565063,
+      "learning_rate": 2e-05,
+      "loss": 0.9538,
+      "step": 90
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 5.851518630981445,
+      "learning_rate": 2e-05,
+      "loss": 0.7171,
+      "step": 92
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 3.3957180976867676,
+      "learning_rate": 2e-05,
+      "loss": 0.8128,
+      "step": 94
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 1.1212002038955688,
+      "learning_rate": 2e-05,
+      "loss": 0.3877,
+      "step": 96
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 3.886902093887329,
+      "learning_rate": 2e-05,
+      "loss": 0.8257,
+      "step": 98
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 3.7381937503814697,
+      "learning_rate": 2e-05,
+      "loss": 0.7051,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "step": 100,
+      "total_flos": 2779357248487424.0,
+      "train_loss": 1.0703019142150878,
+      "train_runtime": 215.987,
+      "train_samples_per_second": 1.852,
+      "train_steps_per_second": 0.463
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2779357248487424.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69d4760f0c1959606186ed7e2dbc53f991bd01e88e3a73b20618c9bcbd214386
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4447cf5a3e3ba4fc20c85906454383543302b54a45eb6230e015b355a7c620c
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c4e788867c89e82a865f5c263bfc27372f9ad1c131ee5f73ba1257cb1358df0
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4132f75da66ae6d2d1d4e9d126dcf9907beda51bc8637a5d299f80f47e1dc1b
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83ae2b5822fb41da2f51fa255253ef5fe5e0742b6941f2e9d52243a28d00e24f
+size 487635186

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea8bb07c77a2cfd707f77600756cadf906298b5c948248212f3d5d1787fded51
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4d08d3691c5ba154f8ed2e0f222d7e52e43fe806519ae3751d1bec29a8d4105
+size 487635186

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e3655617efa0261b43cf077e8d338e5d7409043547ee972bf9bb851282182c3
+size 487635186

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "grad_norm": 3.9793167114257812,
+      "learning_rate": 2e-05,
+      "loss": 0.9603,
+      "step": 2
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 2.5188374519348145,
+      "learning_rate": 2e-05,
+      "loss": 0.8116,
+      "step": 4
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 1.3143935203552246,
+      "learning_rate": 2e-05,
+      "loss": 0.6426,
+      "step": 6
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.7295637130737305,
+      "learning_rate": 2e-05,
+      "loss": 0.7914,
+      "step": 8
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.7666940689086914,
+      "learning_rate": 2e-05,
+      "loss": 0.5176,
+      "step": 10
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 1.7987310886383057,
+      "learning_rate": 2e-05,
+      "loss": 0.2534,
+      "step": 12
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 2.515049695968628,
+      "learning_rate": 2e-05,
+      "loss": 0.5228,
+      "step": 14
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 4.360015869140625,
+      "learning_rate": 2e-05,
+      "loss": 1.2806,
+      "step": 16
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 1.8536200523376465,
+      "learning_rate": 2e-05,
+      "loss": 0.4718,
+      "step": 18
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 2.595823287963867,
+      "learning_rate": 2e-05,
+      "loss": 0.5098,
+      "step": 20
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 3.5519330501556396,
+      "learning_rate": 2e-05,
+      "loss": 1.5173,
+      "step": 22
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 1.3497133255004883,
+      "learning_rate": 2e-05,
+      "loss": 0.9349,
+      "step": 24
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 1.930624008178711,
+      "learning_rate": 2e-05,
+      "loss": 0.4916,
+      "step": 26
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 3.4734158515930176,
+      "learning_rate": 2e-05,
+      "loss": 1.0893,
+      "step": 28
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 2.4782814979553223,
+      "learning_rate": 2e-05,
+      "loss": 0.8819,
+      "step": 30
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 3.521637439727783,
+      "learning_rate": 2e-05,
+      "loss": 0.9765,
+      "step": 32
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 1.3706670999526978,
+      "learning_rate": 2e-05,
+      "loss": 0.4721,
+      "step": 34
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 5.157536506652832,
+      "learning_rate": 2e-05,
+      "loss": 0.672,
+      "step": 36
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 1.5482473373413086,
+      "learning_rate": 2e-05,
+      "loss": 0.3766,
+      "step": 38
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 5.12284517288208,
+      "learning_rate": 2e-05,
+      "loss": 1.2043,
+      "step": 40
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 2.4150655269622803,
+      "learning_rate": 2e-05,
+      "loss": 0.9744,
+      "step": 42
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 2.1841928958892822,
+      "learning_rate": 2e-05,
+      "loss": 0.5246,
+      "step": 44
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 5.618422985076904,
+      "learning_rate": 2e-05,
+      "loss": 1.2388,
+      "step": 46
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 2.524454116821289,
+      "learning_rate": 2e-05,
+      "loss": 0.5944,
+      "step": 48
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 1.7722740173339844,
+      "learning_rate": 2e-05,
+      "loss": 0.4012,
+      "step": 50
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 4.978190898895264,
+      "learning_rate": 2e-05,
+      "loss": 1.5566,
+      "step": 52
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 3.1553709506988525,
+      "learning_rate": 2e-05,
+      "loss": 0.945,
+      "step": 54
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.8509775400161743,
+      "learning_rate": 2e-05,
+      "loss": 0.1817,
+      "step": 56
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 3.637312173843384,
+      "learning_rate": 2e-05,
+      "loss": 0.7817,
+      "step": 58
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 5.024761199951172,
+      "learning_rate": 2e-05,
+      "loss": 1.0443,
+      "step": 60
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 1.6964753866195679,
+      "learning_rate": 2e-05,
+      "loss": 1.0813,
+      "step": 62
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 2.6202216148376465,
+      "learning_rate": 2e-05,
+      "loss": 0.8394,
+      "step": 64
+    },
+    {
+      "epoch": 0.66,
+      "grad_norm": 2.0826215744018555,
+      "learning_rate": 2e-05,
+      "loss": 0.6085,
+      "step": 66
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 1.6744186878204346,
+      "learning_rate": 2e-05,
+      "loss": 0.5914,
+      "step": 68
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 3.6541945934295654,
+      "learning_rate": 2e-05,
+      "loss": 0.8268,
+      "step": 70
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 2.257369041442871,
+      "learning_rate": 2e-05,
+      "loss": 0.5257,
+      "step": 72
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 0.468746542930603,
+      "learning_rate": 2e-05,
+      "loss": 0.5201,
+      "step": 74
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 2.3300929069519043,
+      "learning_rate": 2e-05,
+      "loss": 1.035,
+      "step": 76
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 3.127511739730835,
+      "learning_rate": 2e-05,
+      "loss": 0.8523,
+      "step": 78
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.3533546924591064,
+      "learning_rate": 2e-05,
+      "loss": 0.3409,
+      "step": 80
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 2.2223410606384277,
+      "learning_rate": 2e-05,
+      "loss": 0.5418,
+      "step": 82
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 2.9352030754089355,
+      "learning_rate": 2e-05,
+      "loss": 0.7653,
+      "step": 84
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 0.8046193718910217,
+      "learning_rate": 2e-05,
+      "loss": 0.3562,
+      "step": 86
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 1.5213556289672852,
+      "learning_rate": 2e-05,
+      "loss": 0.484,
+      "step": 88
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 2.6822187900543213,
+      "learning_rate": 2e-05,
+      "loss": 0.8862,
+      "step": 90
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 5.5709662437438965,
+      "learning_rate": 2e-05,
+      "loss": 0.9033,
+      "step": 92
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 4.302905082702637,
+      "learning_rate": 2e-05,
+      "loss": 1.0037,
+      "step": 94
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.346940279006958,
+      "learning_rate": 2e-05,
+      "loss": 0.4615,
+      "step": 96
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 1.4154908657073975,
+      "learning_rate": 2e-05,
+      "loss": 0.2501,
+      "step": 98
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 7.068972587585449,
+      "learning_rate": 2e-05,
+      "loss": 1.4809,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "step": 100,
+      "total_flos": 5838906972962816.0,
+      "train_loss": 0.7595100402832031,
+      "train_runtime": 275.7767,
+      "train_samples_per_second": 1.45,
+      "train_steps_per_second": 0.363
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5838906972962816.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7061a373da978d284035594b21fa90359b7f63246265529bf3bbd4424d47ab77
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea9f5a3d5256c4d387533a530ca53a00edf9a3dd5b539d359ef244972f191740
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0d2247c87168cd25007195226e5a2d0cc8da9ff1ad46d1e494b8a8a64bc353b
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd9f2bd53bd05c8e603b94af226a34333ecaf4dc93ab7e920e7ef6ffed5709e6
+size 487636262

client_states_fedMultipqfullfreeze_sft_pca_bs4_saveoptim_lr2e-5_sc1316_4tasks_5rounds_fixitr100_T0125_decay099/5_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa4f85fb161c9eb53205e83989ae34fdafca370bf153f995e1e6f4ac63320fdf
+size 487635186