thkim0305
/

feddat_baselines

Model card Files Files and versions Community

thkim0305 commited on 6 days ago

Commit

ca33d77

verified ·

1 Parent(s): 0703b3a

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json +378 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json +378 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json +378 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json +378 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json +378 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth +3 -0
client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth +3 -0

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35db35281882d1f226fb057ce1dc1d10268ce76953bee3787606804b39d316ef
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4320beb5e2c354434681ed2d3b2dcdd78d536aedc3fba5f9f1e06943ec2e7c9
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8028b19b5f86ee47adabeb517b0cb8e6f55250d34905080c06d2df87bc90e326
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:399ebbb40d73ecda84215e6074344b500ffade76f87bfea8a0d6d62346a108d5
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a73b669622db0d158ac1056f7ed6b435324a3c4b5f24e9a3bde9565770f36e5
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4562188f7b74a13d10b41b89b9eddab396cfb79214a4849a8637db03c488f361
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c21d1d0f631ea5682fb5747b169d1f49dea3a6af343afe5d51396cda5580892
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd22f5b455184254d90300e38de573ee5573bc8ffd8cb32135fe7f168557e7b
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,378 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 97,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.020618556701030927,
+      "grad_norm": 4.899675369262695,
+      "learning_rate": 2e-05,
+      "loss": 0.4572,
+      "step": 2
+    },
+    {
+      "epoch": 0.041237113402061855,
+      "grad_norm": 0.364077627658844,
+      "learning_rate": 2e-05,
+      "loss": 0.1723,
+      "step": 4
+    },
+    {
+      "epoch": 0.061855670103092786,
+      "grad_norm": 1.9865665435791016,
+      "learning_rate": 2e-05,
+      "loss": 0.6005,
+      "step": 6
+    },
+    {
+      "epoch": 0.08247422680412371,
+      "grad_norm": 4.352550029754639,
+      "learning_rate": 2e-05,
+      "loss": 1.0405,
+      "step": 8
+    },
+    {
+      "epoch": 0.10309278350515463,
+      "grad_norm": 1.1260665655136108,
+      "learning_rate": 2e-05,
+      "loss": 0.2097,
+      "step": 10
+    },
+    {
+      "epoch": 0.12371134020618557,
+      "grad_norm": 4.107581615447998,
+      "learning_rate": 2e-05,
+      "loss": 1.3789,
+      "step": 12
+    },
+    {
+      "epoch": 0.14432989690721648,
+      "grad_norm": 0.47090861201286316,
+      "learning_rate": 2e-05,
+      "loss": 0.381,
+      "step": 14
+    },
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 0.8781032562255859,
+      "learning_rate": 2e-05,
+      "loss": 0.1236,
+      "step": 16
+    },
+    {
+      "epoch": 0.18556701030927836,
+      "grad_norm": 8.501681327819824,
+      "learning_rate": 2e-05,
+      "loss": 1.1681,
+      "step": 18
+    },
+    {
+      "epoch": 0.20618556701030927,
+      "grad_norm": 1.3251969814300537,
+      "learning_rate": 2e-05,
+      "loss": 0.6099,
+      "step": 20
+    },
+    {
+      "epoch": 0.2268041237113402,
+      "grad_norm": 3.748138189315796,
+      "learning_rate": 2e-05,
+      "loss": 1.0833,
+      "step": 22
+    },
+    {
+      "epoch": 0.24742268041237114,
+      "grad_norm": 0.729949414730072,
+      "learning_rate": 2e-05,
+      "loss": 0.1654,
+      "step": 24
+    },
+    {
+      "epoch": 0.26804123711340205,
+      "grad_norm": 1.2475837469100952,
+      "learning_rate": 2e-05,
+      "loss": 0.4468,
+      "step": 26
+    },
+    {
+      "epoch": 0.28865979381443296,
+      "grad_norm": 0.3473127484321594,
+      "learning_rate": 2e-05,
+      "loss": 0.2435,
+      "step": 28
+    },
+    {
+      "epoch": 0.30927835051546393,
+      "grad_norm": 4.668135166168213,
+      "learning_rate": 2e-05,
+      "loss": 1.338,
+      "step": 30
+    },
+    {
+      "epoch": 0.32989690721649484,
+      "grad_norm": 2.5174388885498047,
+      "learning_rate": 2e-05,
+      "loss": 0.2791,
+      "step": 32
+    },
+    {
+      "epoch": 0.35051546391752575,
+      "grad_norm": 2.1460764408111572,
+      "learning_rate": 2e-05,
+      "loss": 0.4064,
+      "step": 34
+    },
+    {
+      "epoch": 0.3711340206185567,
+      "grad_norm": 3.378509998321533,
+      "learning_rate": 2e-05,
+      "loss": 0.7993,
+      "step": 36
+    },
+    {
+      "epoch": 0.3917525773195876,
+      "grad_norm": 1.6487655639648438,
+      "learning_rate": 2e-05,
+      "loss": 0.1094,
+      "step": 38
+    },
+    {
+      "epoch": 0.41237113402061853,
+      "grad_norm": 2.8206214904785156,
+      "learning_rate": 2e-05,
+      "loss": 1.4364,
+      "step": 40
+    },
+    {
+      "epoch": 0.4329896907216495,
+      "grad_norm": 2.229614496231079,
+      "learning_rate": 2e-05,
+      "loss": 0.3985,
+      "step": 42
+    },
+    {
+      "epoch": 0.4536082474226804,
+      "grad_norm": 1.4115321636199951,
+      "learning_rate": 2e-05,
+      "loss": 0.1919,
+      "step": 44
+    },
+    {
+      "epoch": 0.4742268041237113,
+      "grad_norm": 2.823225498199463,
+      "learning_rate": 2e-05,
+      "loss": 0.3599,
+      "step": 46
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 0.7314802408218384,
+      "learning_rate": 2e-05,
+      "loss": 0.9801,
+      "step": 48
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 1.0195244550704956,
+      "learning_rate": 2e-05,
+      "loss": 0.0946,
+      "step": 50
+    },
+    {
+      "epoch": 0.5360824742268041,
+      "grad_norm": 2.806351661682129,
+      "learning_rate": 2e-05,
+      "loss": 0.3121,
+      "step": 52
+    },
+    {
+      "epoch": 0.5567010309278351,
+      "grad_norm": 0.7020225524902344,
+      "learning_rate": 2e-05,
+      "loss": 1.0508,
+      "step": 54
+    },
+    {
+      "epoch": 0.5773195876288659,
+      "grad_norm": 1.1141129732131958,
+      "learning_rate": 2e-05,
+      "loss": 0.1508,
+      "step": 56
+    },
+    {
+      "epoch": 0.5979381443298969,
+      "grad_norm": 2.0665926933288574,
+      "learning_rate": 2e-05,
+      "loss": 0.1938,
+      "step": 58
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 0.09458617866039276,
+      "learning_rate": 2e-05,
+      "loss": 0.5867,
+      "step": 60
+    },
+    {
+      "epoch": 0.6391752577319587,
+      "grad_norm": 1.0511982440948486,
+      "learning_rate": 2e-05,
+      "loss": 2.7297,
+      "step": 62
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 8.42614459991455,
+      "learning_rate": 2e-05,
+      "loss": 1.3084,
+      "step": 64
+    },
+    {
+      "epoch": 0.6804123711340206,
+      "grad_norm": 4.046963691711426,
+      "learning_rate": 2e-05,
+      "loss": 0.8881,
+      "step": 66
+    },
+    {
+      "epoch": 0.7010309278350515,
+      "grad_norm": 2.4904868602752686,
+      "learning_rate": 2e-05,
+      "loss": 0.3384,
+      "step": 68
+    },
+    {
+      "epoch": 0.7216494845360825,
+      "grad_norm": 1.1527413129806519,
+      "learning_rate": 2e-05,
+      "loss": 0.3475,
+      "step": 70
+    },
+    {
+      "epoch": 0.7422680412371134,
+      "grad_norm": 1.3058439493179321,
+      "learning_rate": 2e-05,
+      "loss": 0.5549,
+      "step": 72
+    },
+    {
+      "epoch": 0.7628865979381443,
+      "grad_norm": 0.345022976398468,
+      "learning_rate": 2e-05,
+      "loss": 0.869,
+      "step": 74
+    },
+    {
+      "epoch": 0.7835051546391752,
+      "grad_norm": 0.7008552551269531,
+      "learning_rate": 2e-05,
+      "loss": 0.2608,
+      "step": 76
+    },
+    {
+      "epoch": 0.8041237113402062,
+      "grad_norm": 3.1025912761688232,
+      "learning_rate": 2e-05,
+      "loss": 0.6226,
+      "step": 78
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 3.710395336151123,
+      "learning_rate": 2e-05,
+      "loss": 0.9768,
+      "step": 80
+    },
+    {
+      "epoch": 0.845360824742268,
+      "grad_norm": 0.3900620937347412,
+      "learning_rate": 2e-05,
+      "loss": 0.1941,
+      "step": 82
+    },
+    {
+      "epoch": 0.865979381443299,
+      "grad_norm": 0.23837120831012726,
+      "learning_rate": 2e-05,
+      "loss": 0.2005,
+      "step": 84
+    },
+    {
+      "epoch": 0.8865979381443299,
+      "grad_norm": 5.000380516052246,
+      "learning_rate": 2e-05,
+      "loss": 0.593,
+      "step": 86
+    },
+    {
+      "epoch": 0.9072164948453608,
+      "grad_norm": 2.397953748703003,
+      "learning_rate": 2e-05,
+      "loss": 1.7147,
+      "step": 88
+    },
+    {
+      "epoch": 0.9278350515463918,
+      "grad_norm": 0.20643925666809082,
+      "learning_rate": 2e-05,
+      "loss": 0.0407,
+      "step": 90
+    },
+    {
+      "epoch": 0.9484536082474226,
+      "grad_norm": 3.6777753829956055,
+      "learning_rate": 2e-05,
+      "loss": 0.9327,
+      "step": 92
+    },
+    {
+      "epoch": 0.9690721649484536,
+      "grad_norm": 1.6937133073806763,
+      "learning_rate": 2e-05,
+      "loss": 0.1758,
+      "step": 94
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "grad_norm": 1.2445141077041626,
+      "learning_rate": 2e-05,
+      "loss": 1.7757,
+      "step": 96
+    },
+    {
+      "epoch": 1.0,
+      "step": 97,
+      "total_flos": 2126123943067648.0,
+      "train_loss": 0.6465311345365858,
+      "train_runtime": 271.5469,
+      "train_samples_per_second": 1.429,
+      "train_steps_per_second": 0.357
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 97,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2126123943067648.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c8dec18e9ef0ae448deccaea823d6e0a2cca1306d431c07c7038536434f2756
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:004ea8c34b31af667eb1fb2bdd883169fb9ff673b0cc75384faaf1f7c3cb71bb
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec5da721e1b6c599c6c777932e40e1686f2e690d65f61fafe38ca32a3ce82f4c
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bed02013c2ad5dc7d708c4242ec2ee481920b2c93c7dc9031ed9e9511ea59c47
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdb1c926f6643fd1fd6146b0365d0a68c6ec0ac9c150cc3d0e260b305cb7db58
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e0131f3ad1b687e7f496607cd4e14c1663182a9d79a2923b60a4aa694467d4f
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fc25cbb650a8a7622723b96c175ad75da07b0f5f4eceaf959431699826cf152
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b064918db2915e992ca655eb974f163135c2d01f6c4cb93d91da3ddb56a3189
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,378 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 97,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.020618556701030927,
+      "grad_norm": 0.0385405458509922,
+      "learning_rate": 2e-05,
+      "loss": 0.0186,
+      "step": 2
+    },
+    {
+      "epoch": 0.041237113402061855,
+      "grad_norm": 0.06613821536302567,
+      "learning_rate": 2e-05,
+      "loss": 0.0849,
+      "step": 4
+    },
+    {
+      "epoch": 0.061855670103092786,
+      "grad_norm": 0.02067434787750244,
+      "learning_rate": 2e-05,
+      "loss": 0.0054,
+      "step": 6
+    },
+    {
+      "epoch": 0.08247422680412371,
+      "grad_norm": 0.06695201992988586,
+      "learning_rate": 2e-05,
+      "loss": 0.0057,
+      "step": 8
+    },
+    {
+      "epoch": 0.10309278350515463,
+      "grad_norm": 0.15570124983787537,
+      "learning_rate": 2e-05,
+      "loss": 0.0129,
+      "step": 10
+    },
+    {
+      "epoch": 0.12371134020618557,
+      "grad_norm": 0.05093451589345932,
+      "learning_rate": 2e-05,
+      "loss": 0.0056,
+      "step": 12
+    },
+    {
+      "epoch": 0.14432989690721648,
+      "grad_norm": 0.023744968697428703,
+      "learning_rate": 2e-05,
+      "loss": 0.0124,
+      "step": 14
+    },
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 0.00312516069971025,
+      "learning_rate": 2e-05,
+      "loss": 0.0054,
+      "step": 16
+    },
+    {
+      "epoch": 0.18556701030927836,
+      "grad_norm": 0.006616171449422836,
+      "learning_rate": 2e-05,
+      "loss": 0.0262,
+      "step": 18
+    },
+    {
+      "epoch": 0.20618556701030927,
+      "grad_norm": 0.005298080388456583,
+      "learning_rate": 2e-05,
+      "loss": 0.0043,
+      "step": 20
+    },
+    {
+      "epoch": 0.2268041237113402,
+      "grad_norm": 0.027790764346718788,
+      "learning_rate": 2e-05,
+      "loss": 0.0025,
+      "step": 22
+    },
+    {
+      "epoch": 0.24742268041237114,
+      "grad_norm": 0.036290887743234634,
+      "learning_rate": 2e-05,
+      "loss": 0.0025,
+      "step": 24
+    },
+    {
+      "epoch": 0.26804123711340205,
+      "grad_norm": 0.002953270450234413,
+      "learning_rate": 2e-05,
+      "loss": 0.0039,
+      "step": 26
+    },
+    {
+      "epoch": 0.28865979381443296,
+      "grad_norm": 2.2551519870758057,
+      "learning_rate": 2e-05,
+      "loss": 0.2375,
+      "step": 28
+    },
+    {
+      "epoch": 0.30927835051546393,
+      "grad_norm": 0.18529418110847473,
+      "learning_rate": 2e-05,
+      "loss": 0.0103,
+      "step": 30
+    },
+    {
+      "epoch": 0.32989690721649484,
+      "grad_norm": 1.886093258857727,
+      "learning_rate": 2e-05,
+      "loss": 0.1782,
+      "step": 32
+    },
+    {
+      "epoch": 0.35051546391752575,
+      "grad_norm": 0.22934368252754211,
+      "learning_rate": 2e-05,
+      "loss": 0.0129,
+      "step": 34
+    },
+    {
+      "epoch": 0.3711340206185567,
+      "grad_norm": 2.155086040496826,
+      "learning_rate": 2e-05,
+      "loss": 0.2115,
+      "step": 36
+    },
+    {
+      "epoch": 0.3917525773195876,
+      "grad_norm": 0.00891521479934454,
+      "learning_rate": 2e-05,
+      "loss": 0.0413,
+      "step": 38
+    },
+    {
+      "epoch": 0.41237113402061853,
+      "grad_norm": 0.008472035638988018,
+      "learning_rate": 2e-05,
+      "loss": 0.0038,
+      "step": 40
+    },
+    {
+      "epoch": 0.4329896907216495,
+      "grad_norm": 0.37362828850746155,
+      "learning_rate": 2e-05,
+      "loss": 0.2332,
+      "step": 42
+    },
+    {
+      "epoch": 0.4536082474226804,
+      "grad_norm": 0.011929775588214397,
+      "learning_rate": 2e-05,
+      "loss": 0.001,
+      "step": 44
+    },
+    {
+      "epoch": 0.4742268041237113,
+      "grad_norm": 0.006870917044579983,
+      "learning_rate": 2e-05,
+      "loss": 0.0206,
+      "step": 46
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 0.016721265390515327,
+      "learning_rate": 2e-05,
+      "loss": 0.0028,
+      "step": 48
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 0.024881532415747643,
+      "learning_rate": 2e-05,
+      "loss": 0.0027,
+      "step": 50
+    },
+    {
+      "epoch": 0.5360824742268041,
+      "grad_norm": 0.1209266409277916,
+      "learning_rate": 2e-05,
+      "loss": 0.8265,
+      "step": 52
+    },
+    {
+      "epoch": 0.5567010309278351,
+      "grad_norm": 0.05023783817887306,
+      "learning_rate": 2e-05,
+      "loss": 0.1705,
+      "step": 54
+    },
+    {
+      "epoch": 0.5773195876288659,
+      "grad_norm": 0.026383670046925545,
+      "learning_rate": 2e-05,
+      "loss": 0.0019,
+      "step": 56
+    },
+    {
+      "epoch": 0.5979381443298969,
+      "grad_norm": 0.0019155156332999468,
+      "learning_rate": 2e-05,
+      "loss": 0.0006,
+      "step": 58
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 0.8070590496063232,
+      "learning_rate": 2e-05,
+      "loss": 0.3128,
+      "step": 60
+    },
+    {
+      "epoch": 0.6391752577319587,
+      "grad_norm": 3.526737689971924,
+      "learning_rate": 2e-05,
+      "loss": 0.0995,
+      "step": 62
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 1.8756895065307617,
+      "learning_rate": 2e-05,
+      "loss": 0.2206,
+      "step": 64
+    },
+    {
+      "epoch": 0.6804123711340206,
+      "grad_norm": 0.004317080602049828,
+      "learning_rate": 2e-05,
+      "loss": 0.1284,
+      "step": 66
+    },
+    {
+      "epoch": 0.7010309278350515,
+      "grad_norm": 0.28428155183792114,
+      "learning_rate": 2e-05,
+      "loss": 0.0199,
+      "step": 68
+    },
+    {
+      "epoch": 0.7216494845360825,
+      "grad_norm": 0.006947671063244343,
+      "learning_rate": 2e-05,
+      "loss": 0.0336,
+      "step": 70
+    },
+    {
+      "epoch": 0.7422680412371134,
+      "grad_norm": 0.08691083639860153,
+      "learning_rate": 2e-05,
+      "loss": 0.0093,
+      "step": 72
+    },
+    {
+      "epoch": 0.7628865979381443,
+      "grad_norm": 0.00405128812417388,
+      "learning_rate": 2e-05,
+      "loss": 0.3603,
+      "step": 74
+    },
+    {
+      "epoch": 0.7835051546391752,
+      "grad_norm": 0.00506645767018199,
+      "learning_rate": 2e-05,
+      "loss": 0.0006,
+      "step": 76
+    },
+    {
+      "epoch": 0.8041237113402062,
+      "grad_norm": 0.02798837423324585,
+      "learning_rate": 2e-05,
+      "loss": 0.0018,
+      "step": 78
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 0.011478321626782417,
+      "learning_rate": 2e-05,
+      "loss": 0.0014,
+      "step": 80
+    },
+    {
+      "epoch": 0.845360824742268,
+      "grad_norm": 0.15240181982517242,
+      "learning_rate": 2e-05,
+      "loss": 0.106,
+      "step": 82
+    },
+    {
+      "epoch": 0.865979381443299,
+      "grad_norm": 0.08322691917419434,
+      "learning_rate": 2e-05,
+      "loss": 0.0052,
+      "step": 84
+    },
+    {
+      "epoch": 0.8865979381443299,
+      "grad_norm": 6.921684265136719,
+      "learning_rate": 2e-05,
+      "loss": 0.4361,
+      "step": 86
+    },
+    {
+      "epoch": 0.9072164948453608,
+      "grad_norm": 0.00928011815994978,
+      "learning_rate": 2e-05,
+      "loss": 0.0099,
+      "step": 88
+    },
+    {
+      "epoch": 0.9278350515463918,
+      "grad_norm": 1.7090333700180054,
+      "learning_rate": 2e-05,
+      "loss": 0.1191,
+      "step": 90
+    },
+    {
+      "epoch": 0.9484536082474226,
+      "grad_norm": 0.10394242405891418,
+      "learning_rate": 2e-05,
+      "loss": 0.0202,
+      "step": 92
+    },
+    {
+      "epoch": 0.9690721649484536,
+      "grad_norm": 0.04304055869579315,
+      "learning_rate": 2e-05,
+      "loss": 0.0018,
+      "step": 94
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "grad_norm": 0.20713742077350616,
+      "learning_rate": 2e-05,
+      "loss": 0.0132,
+      "step": 96
+    },
+    {
+      "epoch": 1.0,
+      "step": 97,
+      "total_flos": 2143940014768128.0,
+      "train_loss": 0.08341487873460829,
+      "train_runtime": 270.9164,
+      "train_samples_per_second": 1.432,
+      "train_steps_per_second": 0.358
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 97,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2143940014768128.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8f7f9d641a604368bf8d0e7f00091e0be9859ac869d4b8054b390fa12177e3f
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2d403dbdef0fd93809aadc60b9f7ae19c09b3cf80504dcaca4e989520caa280
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed0479ad680594c42187a2d37e97e45cb9ab21768df8b1d2ad47b75fc9363a3e
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c599dfa725ee23a478848ed8752f9bbe83b3dd481eb78ebf78eb6f1d05c68aac
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52c020b0f544305ea9115132a268be550f348fc8138874cca2695fad374b6040
+size 794706058

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ac62e7a55b58292ccb5668dfb8b1e461a0c99f652b9a0735b19dc2923299501
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:478bf976c320e8c5c811bb0fede5b8ddae2aaeb213094fef2bad1f8edd4ae8ff
+size 794706058

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec3fcadb5bde76ead584df8fbc7c7fc89c48ca61b07f74e41f427c531412c6dd
+size 794706058

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,378 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 97,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.020618556701030927,
+      "grad_norm": 0.07272805273532867,
+      "learning_rate": 2e-05,
+      "loss": 0.1917,
+      "step": 2
+    },
+    {
+      "epoch": 0.041237113402061855,
+      "grad_norm": 0.7147791385650635,
+      "learning_rate": 2e-05,
+      "loss": 0.7121,
+      "step": 4
+    },
+    {
+      "epoch": 0.061855670103092786,
+      "grad_norm": 3.7243456840515137,
+      "learning_rate": 2e-05,
+      "loss": 1.3252,
+      "step": 6
+    },
+    {
+      "epoch": 0.08247422680412371,
+      "grad_norm": 1.936738133430481,
+      "learning_rate": 2e-05,
+      "loss": 1.1401,
+      "step": 8
+    },
+    {
+      "epoch": 0.10309278350515463,
+      "grad_norm": 1.6566787958145142,
+      "learning_rate": 2e-05,
+      "loss": 0.7242,
+      "step": 10
+    },
+    {
+      "epoch": 0.12371134020618557,
+      "grad_norm": 2.212766408920288,
+      "learning_rate": 2e-05,
+      "loss": 0.4594,
+      "step": 12
+    },
+    {
+      "epoch": 0.14432989690721648,
+      "grad_norm": 0.575031042098999,
+      "learning_rate": 2e-05,
+      "loss": 0.4458,
+      "step": 14
+    },
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 0.3744853138923645,
+      "learning_rate": 2e-05,
+      "loss": 0.1401,
+      "step": 16
+    },
+    {
+      "epoch": 0.18556701030927836,
+      "grad_norm": 2.374159812927246,
+      "learning_rate": 2e-05,
+      "loss": 2.0944,
+      "step": 18
+    },
+    {
+      "epoch": 0.20618556701030927,
+      "grad_norm": 2.35343337059021,
+      "learning_rate": 2e-05,
+      "loss": 1.1046,
+      "step": 20
+    },
+    {
+      "epoch": 0.2268041237113402,
+      "grad_norm": 1.7141731977462769,
+      "learning_rate": 2e-05,
+      "loss": 0.4615,
+      "step": 22
+    },
+    {
+      "epoch": 0.24742268041237114,
+      "grad_norm": 0.3044542968273163,
+      "learning_rate": 2e-05,
+      "loss": 0.1688,
+      "step": 24
+    },
+    {
+      "epoch": 0.26804123711340205,
+      "grad_norm": 2.698765754699707,
+      "learning_rate": 2e-05,
+      "loss": 0.9064,
+      "step": 26
+    },
+    {
+      "epoch": 0.28865979381443296,
+      "grad_norm": 0.7965202927589417,
+      "learning_rate": 2e-05,
+      "loss": 0.1694,
+      "step": 28
+    },
+    {
+      "epoch": 0.30927835051546393,
+      "grad_norm": 1.8384367227554321,
+      "learning_rate": 2e-05,
+      "loss": 1.0756,
+      "step": 30
+    },
+    {
+      "epoch": 0.32989690721649484,
+      "grad_norm": 0.3519437909126282,
+      "learning_rate": 2e-05,
+      "loss": 0.2709,
+      "step": 32
+    },
+    {
+      "epoch": 0.35051546391752575,
+      "grad_norm": 2.9105238914489746,
+      "learning_rate": 2e-05,
+      "loss": 1.6573,
+      "step": 34
+    },
+    {
+      "epoch": 0.3711340206185567,
+      "grad_norm": 0.6958692669868469,
+      "learning_rate": 2e-05,
+      "loss": 0.3584,
+      "step": 36
+    },
+    {
+      "epoch": 0.3917525773195876,
+      "grad_norm": 0.4390473961830139,
+      "learning_rate": 2e-05,
+      "loss": 0.2845,
+      "step": 38
+    },
+    {
+      "epoch": 0.41237113402061853,
+      "grad_norm": 0.6055613160133362,
+      "learning_rate": 2e-05,
+      "loss": 0.3944,
+      "step": 40
+    },
+    {
+      "epoch": 0.4329896907216495,
+      "grad_norm": 0.7648600935935974,
+      "learning_rate": 2e-05,
+      "loss": 0.3706,
+      "step": 42
+    },
+    {
+      "epoch": 0.4536082474226804,
+      "grad_norm": 2.231801748275757,
+      "learning_rate": 2e-05,
+      "loss": 1.2567,
+      "step": 44
+    },
+    {
+      "epoch": 0.4742268041237113,
+      "grad_norm": 0.49553102254867554,
+      "learning_rate": 2e-05,
+      "loss": 0.2562,
+      "step": 46
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 0.6220466494560242,
+      "learning_rate": 2e-05,
+      "loss": 0.8419,
+      "step": 48
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 0.9273183941841125,
+      "learning_rate": 2e-05,
+      "loss": 0.3708,
+      "step": 50
+    },
+    {
+      "epoch": 0.5360824742268041,
+      "grad_norm": 1.8283573389053345,
+      "learning_rate": 2e-05,
+      "loss": 0.433,
+      "step": 52
+    },
+    {
+      "epoch": 0.5567010309278351,
+      "grad_norm": 1.1112104654312134,
+      "learning_rate": 2e-05,
+      "loss": 0.366,
+      "step": 54
+    },
+    {
+      "epoch": 0.5773195876288659,
+      "grad_norm": 0.7584994435310364,
+      "learning_rate": 2e-05,
+      "loss": 0.2889,
+      "step": 56
+    },
+    {
+      "epoch": 0.5979381443298969,
+      "grad_norm": 2.2555992603302,
+      "learning_rate": 2e-05,
+      "loss": 0.8371,
+      "step": 58
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 0.22240255773067474,
+      "learning_rate": 2e-05,
+      "loss": 0.3454,
+      "step": 60
+    },
+    {
+      "epoch": 0.6391752577319587,
+      "grad_norm": 0.875527024269104,
+      "learning_rate": 2e-05,
+      "loss": 0.6807,
+      "step": 62
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 1.1137892007827759,
+      "learning_rate": 2e-05,
+      "loss": 0.3103,
+      "step": 64
+    },
+    {
+      "epoch": 0.6804123711340206,
+      "grad_norm": 0.6312543749809265,
+      "learning_rate": 2e-05,
+      "loss": 0.4288,
+      "step": 66
+    },
+    {
+      "epoch": 0.7010309278350515,
+      "grad_norm": 0.4749165177345276,
+      "learning_rate": 2e-05,
+      "loss": 0.1926,
+      "step": 68
+    },
+    {
+      "epoch": 0.7216494845360825,
+      "grad_norm": 0.6268261671066284,
+      "learning_rate": 2e-05,
+      "loss": 0.3698,
+      "step": 70
+    },
+    {
+      "epoch": 0.7422680412371134,
+      "grad_norm": 2.9238345623016357,
+      "learning_rate": 2e-05,
+      "loss": 1.5883,
+      "step": 72
+    },
+    {
+      "epoch": 0.7628865979381443,
+      "grad_norm": 2.378034830093384,
+      "learning_rate": 2e-05,
+      "loss": 1.1422,
+      "step": 74
+    },
+    {
+      "epoch": 0.7835051546391752,
+      "grad_norm": 0.7543140053749084,
+      "learning_rate": 2e-05,
+      "loss": 0.4975,
+      "step": 76
+    },
+    {
+      "epoch": 0.8041237113402062,
+      "grad_norm": 2.226060152053833,
+      "learning_rate": 2e-05,
+      "loss": 0.4604,
+      "step": 78
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 0.058547962456941605,
+      "learning_rate": 2e-05,
+      "loss": 0.9361,
+      "step": 80
+    },
+    {
+      "epoch": 0.845360824742268,
+      "grad_norm": 0.3901329040527344,
+      "learning_rate": 2e-05,
+      "loss": 0.1839,
+      "step": 82
+    },
+    {
+      "epoch": 0.865979381443299,
+      "grad_norm": 0.6894042491912842,
+      "learning_rate": 2e-05,
+      "loss": 0.5983,
+      "step": 84
+    },
+    {
+      "epoch": 0.8865979381443299,
+      "grad_norm": 0.11256992816925049,
+      "learning_rate": 2e-05,
+      "loss": 1.1,
+      "step": 86
+    },
+    {
+      "epoch": 0.9072164948453608,
+      "grad_norm": 1.3978410959243774,
+      "learning_rate": 2e-05,
+      "loss": 0.7952,
+      "step": 88
+    },
+    {
+      "epoch": 0.9278350515463918,
+      "grad_norm": 1.851767659187317,
+      "learning_rate": 2e-05,
+      "loss": 0.7418,
+      "step": 90
+    },
+    {
+      "epoch": 0.9484536082474226,
+      "grad_norm": 0.6821492910385132,
+      "learning_rate": 2e-05,
+      "loss": 0.2547,
+      "step": 92
+    },
+    {
+      "epoch": 0.9690721649484536,
+      "grad_norm": 0.10563112795352936,
+      "learning_rate": 2e-05,
+      "loss": 0.1207,
+      "step": 94
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "grad_norm": 1.140236735343933,
+      "learning_rate": 2e-05,
+      "loss": 1.6027,
+      "step": 96
+    },
+    {
+      "epoch": 1.0,
+      "step": 97,
+      "total_flos": 5055426831843328.0,
+      "train_loss": 0.6651097327163539,
+      "train_runtime": 434.3501,
+      "train_samples_per_second": 0.893,
+      "train_steps_per_second": 0.223
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 97,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5055426831843328.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b8821c785135aa7681f831baee4c8faa6da0bf34abcf61a74c878a10678fc5a
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18fa74d12e99610bc0ba01fcc22126ffaa1e7889f91ec44904c61c9a84bb0197
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:076e147af133bf2506e43a1eaf6d6c2f0188696b68debb8e594ea3dc13963f30
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:405a042f3c8bba329b51f7d518f0dfc03f6e32f11911d86934dd301ce82bb2f6
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c0be93ffc19967965abe2fef04a4f9b6c4b7bf10e0659f09f5704bbbc23cc55
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff9c1bac57727b36b4f19b0a5528833b27575970e5abb9481eef6061d65761d
+size 369838470

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:988261b37d63335deb0ab84f3c8d6341bbb6c1433375cf8e6799d997da572465
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4cf5a7e90df055317a883596df00673fbebe9169a545fa5a2123618e7c68207
+size 369837282

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,378 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 97,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.020618556701030927,
+      "grad_norm": 3.5025784969329834,
+      "learning_rate": 2e-05,
+      "loss": 0.6125,
+      "step": 2
+    },
+    {
+      "epoch": 0.041237113402061855,
+      "grad_norm": 1.6424089670181274,
+      "learning_rate": 2e-05,
+      "loss": 1.3566,
+      "step": 4
+    },
+    {
+      "epoch": 0.061855670103092786,
+      "grad_norm": 4.314863681793213,
+      "learning_rate": 2e-05,
+      "loss": 1.3257,
+      "step": 6
+    },
+    {
+      "epoch": 0.08247422680412371,
+      "grad_norm": 1.1254048347473145,
+      "learning_rate": 2e-05,
+      "loss": 2.5388,
+      "step": 8
+    },
+    {
+      "epoch": 0.10309278350515463,
+      "grad_norm": 1.760192632675171,
+      "learning_rate": 2e-05,
+      "loss": 1.3429,
+      "step": 10
+    },
+    {
+      "epoch": 0.12371134020618557,
+      "grad_norm": 2.004739761352539,
+      "learning_rate": 2e-05,
+      "loss": 1.2,
+      "step": 12
+    },
+    {
+      "epoch": 0.14432989690721648,
+      "grad_norm": 1.0539671182632446,
+      "learning_rate": 2e-05,
+      "loss": 0.6582,
+      "step": 14
+    },
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 4.009338855743408,
+      "learning_rate": 2e-05,
+      "loss": 1.0883,
+      "step": 16
+    },
+    {
+      "epoch": 0.18556701030927836,
+      "grad_norm": 4.920736312866211,
+      "learning_rate": 2e-05,
+      "loss": 0.8855,
+      "step": 18
+    },
+    {
+      "epoch": 0.20618556701030927,
+      "grad_norm": 2.5970003604888916,
+      "learning_rate": 2e-05,
+      "loss": 1.0158,
+      "step": 20
+    },
+    {
+      "epoch": 0.2268041237113402,
+      "grad_norm": 2.9520134925842285,
+      "learning_rate": 2e-05,
+      "loss": 0.726,
+      "step": 22
+    },
+    {
+      "epoch": 0.24742268041237114,
+      "grad_norm": 1.7787227630615234,
+      "learning_rate": 2e-05,
+      "loss": 1.116,
+      "step": 24
+    },
+    {
+      "epoch": 0.26804123711340205,
+      "grad_norm": 3.36810040473938,
+      "learning_rate": 2e-05,
+      "loss": 1.2355,
+      "step": 26
+    },
+    {
+      "epoch": 0.28865979381443296,
+      "grad_norm": 0.9433608651161194,
+      "learning_rate": 2e-05,
+      "loss": 0.6505,
+      "step": 28
+    },
+    {
+      "epoch": 0.30927835051546393,
+      "grad_norm": 2.1001124382019043,
+      "learning_rate": 2e-05,
+      "loss": 0.7527,
+      "step": 30
+    },
+    {
+      "epoch": 0.32989690721649484,
+      "grad_norm": 2.966327428817749,
+      "learning_rate": 2e-05,
+      "loss": 1.2433,
+      "step": 32
+    },
+    {
+      "epoch": 0.35051546391752575,
+      "grad_norm": 3.646400213241577,
+      "learning_rate": 2e-05,
+      "loss": 1.3473,
+      "step": 34
+    },
+    {
+      "epoch": 0.3711340206185567,
+      "grad_norm": 1.8998469114303589,
+      "learning_rate": 2e-05,
+      "loss": 0.7622,
+      "step": 36
+    },
+    {
+      "epoch": 0.3917525773195876,
+      "grad_norm": 0.9765092134475708,
+      "learning_rate": 2e-05,
+      "loss": 0.7605,
+      "step": 38
+    },
+    {
+      "epoch": 0.41237113402061853,
+      "grad_norm": 2.9249725341796875,
+      "learning_rate": 2e-05,
+      "loss": 0.8117,
+      "step": 40
+    },
+    {
+      "epoch": 0.4329896907216495,
+      "grad_norm": 1.4821889400482178,
+      "learning_rate": 2e-05,
+      "loss": 0.4398,
+      "step": 42
+    },
+    {
+      "epoch": 0.4536082474226804,
+      "grad_norm": 1.9430099725723267,
+      "learning_rate": 2e-05,
+      "loss": 0.6923,
+      "step": 44
+    },
+    {
+      "epoch": 0.4742268041237113,
+      "grad_norm": 1.3418391942977905,
+      "learning_rate": 2e-05,
+      "loss": 0.4962,
+      "step": 46
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 1.5497465133666992,
+      "learning_rate": 2e-05,
+      "loss": 0.7783,
+      "step": 48
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 4.256335258483887,
+      "learning_rate": 2e-05,
+      "loss": 1.774,
+      "step": 50
+    },
+    {
+      "epoch": 0.5360824742268041,
+      "grad_norm": 1.0099101066589355,
+      "learning_rate": 2e-05,
+      "loss": 1.0536,
+      "step": 52
+    },
+    {
+      "epoch": 0.5567010309278351,
+      "grad_norm": 1.356179118156433,
+      "learning_rate": 2e-05,
+      "loss": 0.9636,
+      "step": 54
+    },
+    {
+      "epoch": 0.5773195876288659,
+      "grad_norm": 4.572417259216309,
+      "learning_rate": 2e-05,
+      "loss": 1.398,
+      "step": 56
+    },
+    {
+      "epoch": 0.5979381443298969,
+      "grad_norm": 4.678338050842285,
+      "learning_rate": 2e-05,
+      "loss": 2.1435,
+      "step": 58
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 3.575714588165283,
+      "learning_rate": 2e-05,
+      "loss": 1.9645,
+      "step": 60
+    },
+    {
+      "epoch": 0.6391752577319587,
+      "grad_norm": 2.5277392864227295,
+      "learning_rate": 2e-05,
+      "loss": 0.6022,
+      "step": 62
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 3.385993003845215,
+      "learning_rate": 2e-05,
+      "loss": 0.7407,
+      "step": 64
+    },
+    {
+      "epoch": 0.6804123711340206,
+      "grad_norm": 5.360639572143555,
+      "learning_rate": 2e-05,
+      "loss": 2.6118,
+      "step": 66
+    },
+    {
+      "epoch": 0.7010309278350515,
+      "grad_norm": 6.285584926605225,
+      "learning_rate": 2e-05,
+      "loss": 1.1991,
+      "step": 68
+    },
+    {
+      "epoch": 0.7216494845360825,
+      "grad_norm": 3.385345697402954,
+      "learning_rate": 2e-05,
+      "loss": 1.0076,
+      "step": 70
+    },
+    {
+      "epoch": 0.7422680412371134,
+      "grad_norm": 2.7984771728515625,
+      "learning_rate": 2e-05,
+      "loss": 0.8717,
+      "step": 72
+    },
+    {
+      "epoch": 0.7628865979381443,
+      "grad_norm": 1.8088620901107788,
+      "learning_rate": 2e-05,
+      "loss": 1.9135,
+      "step": 74
+    },
+    {
+      "epoch": 0.7835051546391752,
+      "grad_norm": 1.2181267738342285,
+      "learning_rate": 2e-05,
+      "loss": 1.112,
+      "step": 76
+    },
+    {
+      "epoch": 0.8041237113402062,
+      "grad_norm": 2.389383316040039,
+      "learning_rate": 2e-05,
+      "loss": 0.896,
+      "step": 78
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 1.2696701288223267,
+      "learning_rate": 2e-05,
+      "loss": 0.5581,
+      "step": 80
+    },
+    {
+      "epoch": 0.845360824742268,
+      "grad_norm": 4.296065330505371,
+      "learning_rate": 2e-05,
+      "loss": 1.6235,
+      "step": 82
+    },
+    {
+      "epoch": 0.865979381443299,
+      "grad_norm": 5.560433387756348,
+      "learning_rate": 2e-05,
+      "loss": 1.7516,
+      "step": 84
+    },
+    {
+      "epoch": 0.8865979381443299,
+      "grad_norm": 1.6488584280014038,
+      "learning_rate": 2e-05,
+      "loss": 0.7037,
+      "step": 86
+    },
+    {
+      "epoch": 0.9072164948453608,
+      "grad_norm": 1.2957154512405396,
+      "learning_rate": 2e-05,
+      "loss": 0.5997,
+      "step": 88
+    },
+    {
+      "epoch": 0.9278350515463918,
+      "grad_norm": 2.889270305633545,
+      "learning_rate": 2e-05,
+      "loss": 1.9081,
+      "step": 90
+    },
+    {
+      "epoch": 0.9484536082474226,
+      "grad_norm": 3.077247381210327,
+      "learning_rate": 2e-05,
+      "loss": 0.8997,
+      "step": 92
+    },
+    {
+      "epoch": 0.9690721649484536,
+      "grad_norm": 4.6420979499816895,
+      "learning_rate": 2e-05,
+      "loss": 0.8319,
+      "step": 94
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "grad_norm": 2.454413890838623,
+      "learning_rate": 2e-05,
+      "loss": 0.9373,
+      "step": 96
+    },
+    {
+      "epoch": 1.0,
+      "step": 97,
+      "total_flos": 2171896458117120.0,
+      "train_loss": 1.122876275445997,
+      "train_runtime": 270.0103,
+      "train_samples_per_second": 1.437,
+      "train_steps_per_second": 0.359
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 97,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2171896458117120.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23387506e7ecf8f3251f0025829a56baa1bf2f769ac9bfa4fdcc90e01a070768
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0acdddfcb37d7b530feeec8001857fec2a659c79f2cf1f4c55c44a9f66e5ae63
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cede0a0fcd2006ffaa32ece4315df4e5adb2865e91e0f13e75df63d528db0cb2
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb53b8d130aba5244f53331294cf05558d5bfa3cc6c8190cc13b230d1d64a46
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c08a07e6cd3316c5ec6b2ccd96a9617354d3e87001a2805ad3d3e3f83f88291
+size 794706058

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0592e6c0f8a48ad175749a2b569c9f722087166f0892e342960007825ba90a03
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4df776933e0bbc0cdc681ab9592b09d9fe10f92c523f0383b464947c0e593677
+size 794706058

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f820f6c817a919f8f9d184c0b292b5664efef2f06579de05ae74f87c093aa64
+size 794706058

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,378 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 97,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.020618556701030927,
+      "grad_norm": 2.518491506576538,
+      "learning_rate": 2e-05,
+      "loss": 0.8582,
+      "step": 2
+    },
+    {
+      "epoch": 0.041237113402061855,
+      "grad_norm": 1.6445577144622803,
+      "learning_rate": 2e-05,
+      "loss": 1.1082,
+      "step": 4
+    },
+    {
+      "epoch": 0.061855670103092786,
+      "grad_norm": 1.276530146598816,
+      "learning_rate": 2e-05,
+      "loss": 0.9062,
+      "step": 6
+    },
+    {
+      "epoch": 0.08247422680412371,
+      "grad_norm": 1.4562397003173828,
+      "learning_rate": 2e-05,
+      "loss": 1.2294,
+      "step": 8
+    },
+    {
+      "epoch": 0.10309278350515463,
+      "grad_norm": 1.4720100164413452,
+      "learning_rate": 2e-05,
+      "loss": 0.4722,
+      "step": 10
+    },
+    {
+      "epoch": 0.12371134020618557,
+      "grad_norm": 0.8212082386016846,
+      "learning_rate": 2e-05,
+      "loss": 0.3804,
+      "step": 12
+    },
+    {
+      "epoch": 0.14432989690721648,
+      "grad_norm": 1.2318272590637207,
+      "learning_rate": 2e-05,
+      "loss": 0.4925,
+      "step": 14
+    },
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 1.513741374015808,
+      "learning_rate": 2e-05,
+      "loss": 0.9536,
+      "step": 16
+    },
+    {
+      "epoch": 0.18556701030927836,
+      "grad_norm": 0.5804106593132019,
+      "learning_rate": 2e-05,
+      "loss": 0.8147,
+      "step": 18
+    },
+    {
+      "epoch": 0.20618556701030927,
+      "grad_norm": 1.2517503499984741,
+      "learning_rate": 2e-05,
+      "loss": 0.4827,
+      "step": 20
+    },
+    {
+      "epoch": 0.2268041237113402,
+      "grad_norm": 0.6721848249435425,
+      "learning_rate": 2e-05,
+      "loss": 0.3412,
+      "step": 22
+    },
+    {
+      "epoch": 0.24742268041237114,
+      "grad_norm": 1.1781041622161865,
+      "learning_rate": 2e-05,
+      "loss": 0.872,
+      "step": 24
+    },
+    {
+      "epoch": 0.26804123711340205,
+      "grad_norm": 1.1887950897216797,
+      "learning_rate": 2e-05,
+      "loss": 0.5626,
+      "step": 26
+    },
+    {
+      "epoch": 0.28865979381443296,
+      "grad_norm": 1.4811246395111084,
+      "learning_rate": 2e-05,
+      "loss": 0.4312,
+      "step": 28
+    },
+    {
+      "epoch": 0.30927835051546393,
+      "grad_norm": 1.3950275182724,
+      "learning_rate": 2e-05,
+      "loss": 0.5554,
+      "step": 30
+    },
+    {
+      "epoch": 0.32989690721649484,
+      "grad_norm": 0.8101319670677185,
+      "learning_rate": 2e-05,
+      "loss": 0.2638,
+      "step": 32
+    },
+    {
+      "epoch": 0.35051546391752575,
+      "grad_norm": 1.3041068315505981,
+      "learning_rate": 2e-05,
+      "loss": 1.154,
+      "step": 34
+    },
+    {
+      "epoch": 0.3711340206185567,
+      "grad_norm": 1.0701684951782227,
+      "learning_rate": 2e-05,
+      "loss": 0.7192,
+      "step": 36
+    },
+    {
+      "epoch": 0.3917525773195876,
+      "grad_norm": 1.6231181621551514,
+      "learning_rate": 2e-05,
+      "loss": 0.7025,
+      "step": 38
+    },
+    {
+      "epoch": 0.41237113402061853,
+      "grad_norm": 1.8748000860214233,
+      "learning_rate": 2e-05,
+      "loss": 1.6573,
+      "step": 40
+    },
+    {
+      "epoch": 0.4329896907216495,
+      "grad_norm": 0.7263919711112976,
+      "learning_rate": 2e-05,
+      "loss": 0.8162,
+      "step": 42
+    },
+    {
+      "epoch": 0.4536082474226804,
+      "grad_norm": 1.2520265579223633,
+      "learning_rate": 2e-05,
+      "loss": 0.8266,
+      "step": 44
+    },
+    {
+      "epoch": 0.4742268041237113,
+      "grad_norm": 0.34067195653915405,
+      "learning_rate": 2e-05,
+      "loss": 0.4451,
+      "step": 46
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 1.4540058374404907,
+      "learning_rate": 2e-05,
+      "loss": 0.6771,
+      "step": 48
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 1.0151292085647583,
+      "learning_rate": 2e-05,
+      "loss": 0.8007,
+      "step": 50
+    },
+    {
+      "epoch": 0.5360824742268041,
+      "grad_norm": 1.1358588933944702,
+      "learning_rate": 2e-05,
+      "loss": 0.2688,
+      "step": 52
+    },
+    {
+      "epoch": 0.5567010309278351,
+      "grad_norm": 0.9416270852088928,
+      "learning_rate": 2e-05,
+      "loss": 0.431,
+      "step": 54
+    },
+    {
+      "epoch": 0.5773195876288659,
+      "grad_norm": 1.288041591644287,
+      "learning_rate": 2e-05,
+      "loss": 0.3753,
+      "step": 56
+    },
+    {
+      "epoch": 0.5979381443298969,
+      "grad_norm": 0.13528066873550415,
+      "learning_rate": 2e-05,
+      "loss": 0.1003,
+      "step": 58
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 1.8311398029327393,
+      "learning_rate": 2e-05,
+      "loss": 0.8292,
+      "step": 60
+    },
+    {
+      "epoch": 0.6391752577319587,
+      "grad_norm": 2.2910356521606445,
+      "learning_rate": 2e-05,
+      "loss": 0.7834,
+      "step": 62
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 0.40395867824554443,
+      "learning_rate": 2e-05,
+      "loss": 0.5579,
+      "step": 64
+    },
+    {
+      "epoch": 0.6804123711340206,
+      "grad_norm": 0.6555685997009277,
+      "learning_rate": 2e-05,
+      "loss": 1.3677,
+      "step": 66
+    },
+    {
+      "epoch": 0.7010309278350515,
+      "grad_norm": 0.7282531261444092,
+      "learning_rate": 2e-05,
+      "loss": 0.8838,
+      "step": 68
+    },
+    {
+      "epoch": 0.7216494845360825,
+      "grad_norm": 1.5124473571777344,
+      "learning_rate": 2e-05,
+      "loss": 0.5091,
+      "step": 70
+    },
+    {
+      "epoch": 0.7422680412371134,
+      "grad_norm": 1.059186339378357,
+      "learning_rate": 2e-05,
+      "loss": 0.228,
+      "step": 72
+    },
+    {
+      "epoch": 0.7628865979381443,
+      "grad_norm": 1.1664392948150635,
+      "learning_rate": 2e-05,
+      "loss": 0.641,
+      "step": 74
+    },
+    {
+      "epoch": 0.7835051546391752,
+      "grad_norm": 0.772824764251709,
+      "learning_rate": 2e-05,
+      "loss": 0.1895,
+      "step": 76
+    },
+    {
+      "epoch": 0.8041237113402062,
+      "grad_norm": 0.9583086371421814,
+      "learning_rate": 2e-05,
+      "loss": 0.5631,
+      "step": 78
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 2.7325258255004883,
+      "learning_rate": 2e-05,
+      "loss": 1.264,
+      "step": 80
+    },
+    {
+      "epoch": 0.845360824742268,
+      "grad_norm": 0.539401650428772,
+      "learning_rate": 2e-05,
+      "loss": 0.22,
+      "step": 82
+    },
+    {
+      "epoch": 0.865979381443299,
+      "grad_norm": 1.19405996799469,
+      "learning_rate": 2e-05,
+      "loss": 0.6883,
+      "step": 84
+    },
+    {
+      "epoch": 0.8865979381443299,
+      "grad_norm": 1.0464004278182983,
+      "learning_rate": 2e-05,
+      "loss": 0.5503,
+      "step": 86
+    },
+    {
+      "epoch": 0.9072164948453608,
+      "grad_norm": 2.1325461864471436,
+      "learning_rate": 2e-05,
+      "loss": 1.0865,
+      "step": 88
+    },
+    {
+      "epoch": 0.9278350515463918,
+      "grad_norm": 2.034447431564331,
+      "learning_rate": 2e-05,
+      "loss": 0.6971,
+      "step": 90
+    },
+    {
+      "epoch": 0.9484536082474226,
+      "grad_norm": 1.3602426052093506,
+      "learning_rate": 2e-05,
+      "loss": 1.1031,
+      "step": 92
+    },
+    {
+      "epoch": 0.9690721649484536,
+      "grad_norm": 0.8754698634147644,
+      "learning_rate": 2e-05,
+      "loss": 0.374,
+      "step": 94
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "grad_norm": 1.953580617904663,
+      "learning_rate": 2e-05,
+      "loss": 0.7177,
+      "step": 96
+    },
+    {
+      "epoch": 1.0,
+      "step": 97,
+      "total_flos": 5852858146619392.0,
+      "train_loss": 0.6841668984324661,
+      "train_runtime": 435.2797,
+      "train_samples_per_second": 0.891,
+      "train_steps_per_second": 0.223
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 97,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5852858146619392.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:872bf8692b034370c1c4c0f005922a1a8759668f6bd24ab1b7a8b31e15575065
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:136f7c89ceb0d2c2c8c383833ddae35cdb2e3237ee03fcb28b38f411bfd09a50
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b964fe05e5bbbea46bfb3de2d1319ba929f4bbf38e97ba47c990b8674af5305d
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adf2d32495d65af59172b9ad8c17f0e066399ac7f6494c618e3b788278339274
+size 794708086

client_states_feddualMultipqfullfreeze_homoAgg_moe_NOCONT_bs4_saveoptim_lr2e-5_sc315_4tasks_5rounds_fixitr97_T0125_decay099/5_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88ffa0b5cd6e5c1d97b79db546d8c18cacf42c828b5cba45f2b950fea5897c56
+size 794706058