thkim0305
/

feddat_baselines

Model card Files Files and versions Community

thkim0305 commited on about 1 month ago

Commit

b069aa5

verified ·

1 Parent(s): 54b6f9d

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth +3 -0

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fe1dbc65f417209d09344d4e4995df5d78c5eef26666b716f886d90d26de53b
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0166898c5d59908d3d4e636b1e8705ab438946e36b7f214e4c120674652f0647
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:748f75b79de639a2a4af53a70cce66a8ca2082d031f11a4e62a32187924c4242
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf98a0fa6bcb6b3f4639a9d3ce8d0d4bc9f306fc5c94fe909c335ed5961f3391
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d679df9ca797825f3a82c1c09077f75eeace1c8de08ab30c5fa922069db709e1
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b197e79ebb6d8ae64ec6739b5cbf3cb2c42e1e175ddd75ee657f36a5e8f23cd
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efc8ee78f3b179428383f77c08c7e1005b657efa43a78e8a1acbfab15a6f4244
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0b0c72c61440e9ae802fcad9652d631f5f56aa289e047e253f7ee81d8019dfe
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.7212343215942383,
+      "learning_rate": 2e-05,
+      "loss": 0.0439,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 11.383496284484863,
+      "learning_rate": 2e-05,
+      "loss": 0.5268,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.927216112613678,
+      "learning_rate": 2e-05,
+      "loss": 0.337,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 3.203252077102661,
+      "learning_rate": 2e-05,
+      "loss": 0.3553,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.08366145938634872,
+      "learning_rate": 2e-05,
+      "loss": 0.0917,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 6.82559061050415,
+      "learning_rate": 2e-05,
+      "loss": 0.6837,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 1.8658998012542725,
+      "learning_rate": 2e-05,
+      "loss": 0.0646,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 1.4391404390335083,
+      "learning_rate": 2e-05,
+      "loss": 0.0581,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 1.4982571601867676,
+      "learning_rate": 2e-05,
+      "loss": 0.3933,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.6830452680587769,
+      "learning_rate": 2e-05,
+      "loss": 0.1075,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.245327427983284,
+      "learning_rate": 2e-05,
+      "loss": 0.0312,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 10.72057056427002,
+      "learning_rate": 2e-05,
+      "loss": 0.38,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 1.239396095275879,
+      "learning_rate": 2e-05,
+      "loss": 0.0602,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.4094916880130768,
+      "learning_rate": 2e-05,
+      "loss": 0.0428,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 1.428717851638794,
+      "learning_rate": 2e-05,
+      "loss": 0.6015,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.11368861049413681,
+      "learning_rate": 2e-05,
+      "loss": 0.1259,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 1.6821730136871338,
+      "learning_rate": 2e-05,
+      "loss": 0.1679,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 7.368831157684326,
+      "learning_rate": 2e-05,
+      "loss": 0.3051,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 4.640942096710205,
+      "learning_rate": 2e-05,
+      "loss": 0.1196,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.3502234220504761,
+      "learning_rate": 2e-05,
+      "loss": 0.1621,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 2.8279569149017334,
+      "learning_rate": 2e-05,
+      "loss": 0.4888,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 0.6382105946540833,
+      "learning_rate": 2e-05,
+      "loss": 0.1357,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 0.40106961131095886,
+      "learning_rate": 2e-05,
+      "loss": 0.3606,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 5.146971702575684,
+      "learning_rate": 2e-05,
+      "loss": 0.1506,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.3846442997455597,
+      "learning_rate": 2e-05,
+      "loss": 0.0316,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 5266647493378048.0,
+      "train_loss": 0.23302085906267167,
+      "train_runtime": 198.109,
+      "train_samples_per_second": 1.01,
+      "train_steps_per_second": 0.252
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5266647493378048.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c2aec2a71a2298864fb9cb71bbe157edfe31cb48c8758ed137adf555b57706c
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90b0271eee7daa4e6cf36df93827f4dd84b51c02fbc7eeb354a4584bc9eecc1c
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c319587b2f08331b839f2419acb0e4bfacc9c57991286ab2a77ea96d9feb5cbc
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9bb6d41e065f1080f20435a320c1edfe1f3c8cba9f031e54d7610b9e367f8a9
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f72227369a7df43e68a0bb4081030172e220490ef080b56abc68a7bdce46ea6
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a435b97115a2feb785df092f369b70fa82f9fb72cf73f47bd559d40fa9b5a2a
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0390a2236b53f82449b5243e68ffc4c185e44bc9517abac645cbedc5678052c9
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecfbee4594e86c27822bb6a0d822dc075ba4815540812516b3e99f0069448cce
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.2633078396320343,
+      "learning_rate": 2e-05,
+      "loss": 0.0962,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 18.888702392578125,
+      "learning_rate": 2e-05,
+      "loss": 0.2108,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 5.582844257354736,
+      "learning_rate": 2e-05,
+      "loss": 0.401,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.3722727596759796,
+      "learning_rate": 2e-05,
+      "loss": 0.0165,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.6710987687110901,
+      "learning_rate": 2e-05,
+      "loss": 0.0204,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.009970537386834621,
+      "learning_rate": 2e-05,
+      "loss": 0.0149,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.994692862033844,
+      "learning_rate": 2e-05,
+      "loss": 0.0285,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.025523852556943893,
+      "learning_rate": 2e-05,
+      "loss": 0.015,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.12230795621871948,
+      "learning_rate": 2e-05,
+      "loss": 0.1153,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.009217753075063229,
+      "learning_rate": 2e-05,
+      "loss": 0.0512,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 2.2184362411499023,
+      "learning_rate": 2e-05,
+      "loss": 0.1586,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 7.529627323150635,
+      "learning_rate": 2e-05,
+      "loss": 0.164,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 1.4156068563461304,
+      "learning_rate": 2e-05,
+      "loss": 0.1002,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.2617127001285553,
+      "learning_rate": 2e-05,
+      "loss": 0.0166,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.015605290420353413,
+      "learning_rate": 2e-05,
+      "loss": 0.0145,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.057093504816293716,
+      "learning_rate": 2e-05,
+      "loss": 0.015,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 0.13609035313129425,
+      "learning_rate": 2e-05,
+      "loss": 0.0151,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 0.03114943951368332,
+      "learning_rate": 2e-05,
+      "loss": 0.1649,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 0.026129912585020065,
+      "learning_rate": 2e-05,
+      "loss": 0.0145,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.42378631234169006,
+      "learning_rate": 2e-05,
+      "loss": 0.0229,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 1.1538219451904297,
+      "learning_rate": 2e-05,
+      "loss": 0.0286,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 1.1634211540222168,
+      "learning_rate": 2e-05,
+      "loss": 0.0363,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 0.014446967281401157,
+      "learning_rate": 2e-05,
+      "loss": 0.0142,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.0895637720823288,
+      "learning_rate": 2e-05,
+      "loss": 0.0152,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.36212360858917236,
+      "learning_rate": 2e-05,
+      "loss": 0.0169,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 5303430465716224.0,
+      "train_loss": 0.07068853974342346,
+      "train_runtime": 198.2611,
+      "train_samples_per_second": 1.009,
+      "train_steps_per_second": 0.252
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5303430465716224.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc38d3fec8aaf304a73dc0465221009621d12e35fe11437da6cdd8c90b9648e8
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4af31ee51a64d1dc941abff726f2d86b9aed76733dbd2124a460acb8f7fc752
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6760ac39e030ae9fa6ebc136bdc504c2239d72745f5b973ed81a1e0decdfc048
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e5eebbd87377d275d69626cccc34474e06e8611ca75b57099b23a759cb721b7
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1198bde519b9e1569c1f5d34b7cd75ade0f1ccdede78bf1e1d666c2185198bf0
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0db7977fb93fec26d6d524d6bb78a926367418764df5ead8d7437c724c9d30d9
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8e377ed9ce274f12fa63914553e2b4dd7b85b45fcaba55bde1fefdb0aee6e0f
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:154985bd935d4f05b0381b48c6606a5ef06bdc0053cdc7537f2dc22e5dc37771
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 3.617075204849243,
+      "learning_rate": 2e-05,
+      "loss": 0.1032,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.15696358680725098,
+      "learning_rate": 2e-05,
+      "loss": 0.1814,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 5.014780521392822,
+      "learning_rate": 2e-05,
+      "loss": 0.761,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 9.337811470031738,
+      "learning_rate": 2e-05,
+      "loss": 1.8625,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 6.394294261932373,
+      "learning_rate": 2e-05,
+      "loss": 0.6403,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.694501519203186,
+      "learning_rate": 2e-05,
+      "loss": 0.2702,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 27.820241928100586,
+      "learning_rate": 2e-05,
+      "loss": 0.3551,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 2.758286476135254,
+      "learning_rate": 2e-05,
+      "loss": 0.4164,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 1.9265002012252808,
+      "learning_rate": 2e-05,
+      "loss": 0.2844,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 3.5484347343444824,
+      "learning_rate": 2e-05,
+      "loss": 0.3019,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 2.66292405128479,
+      "learning_rate": 2e-05,
+      "loss": 0.2797,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 3.7674098014831543,
+      "learning_rate": 2e-05,
+      "loss": 0.404,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 7.080770969390869,
+      "learning_rate": 2e-05,
+      "loss": 0.4917,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 3.9330906867980957,
+      "learning_rate": 2e-05,
+      "loss": 0.3915,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 3.2564377784729004,
+      "learning_rate": 2e-05,
+      "loss": 0.242,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 1.0977956056594849,
+      "learning_rate": 2e-05,
+      "loss": 0.0854,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 4.018260478973389,
+      "learning_rate": 2e-05,
+      "loss": 0.5377,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 9.991900444030762,
+      "learning_rate": 2e-05,
+      "loss": 0.5558,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 1.1954278945922852,
+      "learning_rate": 2e-05,
+      "loss": 0.6506,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.9688401818275452,
+      "learning_rate": 2e-05,
+      "loss": 0.1332,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 5.815992832183838,
+      "learning_rate": 2e-05,
+      "loss": 0.3631,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 2.7181556224823,
+      "learning_rate": 2e-05,
+      "loss": 0.2599,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 4.442654132843018,
+      "learning_rate": 2e-05,
+      "loss": 0.191,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 7.31368350982666,
+      "learning_rate": 2e-05,
+      "loss": 0.4697,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 7.1462225914001465,
+      "learning_rate": 2e-05,
+      "loss": 0.3045,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 5195491428007936.0,
+      "train_loss": 0.42144030570983887,
+      "train_runtime": 199.465,
+      "train_samples_per_second": 1.003,
+      "train_steps_per_second": 0.251
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5195491428007936.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7fc4f7efb25926bfa826673aa01335f815c127661a07f3cbb64a95c8fdffb40
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48b399960eea88ae61a0a1aa3dfd3c1813abcdad91c1b4224ea8933067755a06
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89943c600b2b161ec6014448b900c8adf846847c4b9f68ffbf21de31f5e5d32c
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5699250baf94f3019eaeb1ef41db0f18f6dfd3e0c3bd0a112b411bd4170e0eb1
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:472fbc1f43c917af0bb1f5e94c2286225b782951b640924fbaa37c723473a9ad
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c0e5b108c6b221edf19db70fe14233bf9b9b7422803f13e1c304e11f6e5911a
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c149fa6c092ba05bb37700e485f6699abc82de896528610d1635a195fc9c7a0
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a59d5ce425a0645c183f6974a7833ce97c3e20569b720d8dec7176fff0fd0186
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 8.167108535766602,
+      "learning_rate": 2e-05,
+      "loss": 0.7666,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.970125138759613,
+      "learning_rate": 2e-05,
+      "loss": 0.4831,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 6.168637275695801,
+      "learning_rate": 2e-05,
+      "loss": 0.3263,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 4.275420188903809,
+      "learning_rate": 2e-05,
+      "loss": 0.4419,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 7.258407115936279,
+      "learning_rate": 2e-05,
+      "loss": 0.7066,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 5.875009059906006,
+      "learning_rate": 2e-05,
+      "loss": 0.8123,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 3.180713653564453,
+      "learning_rate": 2e-05,
+      "loss": 0.2229,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 1.822489619255066,
+      "learning_rate": 2e-05,
+      "loss": 0.245,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 8.324261665344238,
+      "learning_rate": 2e-05,
+      "loss": 0.6389,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 4.086201190948486,
+      "learning_rate": 2e-05,
+      "loss": 0.1874,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 4.469182968139648,
+      "learning_rate": 2e-05,
+      "loss": 0.6376,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 9.68209171295166,
+      "learning_rate": 2e-05,
+      "loss": 0.4976,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 15.532666206359863,
+      "learning_rate": 2e-05,
+      "loss": 1.6447,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 8.273395538330078,
+      "learning_rate": 2e-05,
+      "loss": 0.3491,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 7.468406677246094,
+      "learning_rate": 2e-05,
+      "loss": 0.6633,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 8.63184642791748,
+      "learning_rate": 2e-05,
+      "loss": 0.8022,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 12.6749267578125,
+      "learning_rate": 2e-05,
+      "loss": 0.7509,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 4.348174571990967,
+      "learning_rate": 2e-05,
+      "loss": 0.2364,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 4.658010005950928,
+      "learning_rate": 2e-05,
+      "loss": 0.5582,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 3.064706802368164,
+      "learning_rate": 2e-05,
+      "loss": 0.7061,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 15.28968334197998,
+      "learning_rate": 2e-05,
+      "loss": 1.1366,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 4.18186092376709,
+      "learning_rate": 2e-05,
+      "loss": 0.3874,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 2.199023723602295,
+      "learning_rate": 2e-05,
+      "loss": 0.4409,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 7.054344177246094,
+      "learning_rate": 2e-05,
+      "loss": 0.9599,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 2.990053653717041,
+      "learning_rate": 2e-05,
+      "loss": 0.3781,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 5376319969820672.0,
+      "train_loss": 0.599202663898468,
+      "train_runtime": 198.5307,
+      "train_samples_per_second": 1.007,
+      "train_steps_per_second": 0.252
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5376319969820672.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9eb75c8782194720b59887465c995e11d72f78c8aca656c42c9b469a59124a9
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b39a4efe80d568e273b5a623ab5c108b7e27d937316250436a587da562ac3d86
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09f0af7ab102fbed3a844c53a4f7dcc5f21677451244983cab0caf1c747d76f5
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18987b20b7bbc6a88d60b2b91648ad3a08b577918fa565e2278a750aee69cc6d
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bd496e40f9341b403fe665f4a3f2261ae72e39294e7a4678261659e562606ce
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8d9e327003fd39d6bdcc0eabd84abc83f519cb69d6abde49f8d1544c15881a6
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d90208da0627322e15237fe2b0bc8fccf40d8472da50b806d3941b4128ce6b8
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06dc84e7ab1330bf113bc7de877ec9cedc283cb04d0aa6de93816d56ad6d6cbe
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 4.40165376663208,
+      "learning_rate": 2e-05,
+      "loss": 0.3787,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 3.2695400714874268,
+      "learning_rate": 2e-05,
+      "loss": 0.6385,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 2.2764341831207275,
+      "learning_rate": 2e-05,
+      "loss": 0.3251,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 2.679776191711426,
+      "learning_rate": 2e-05,
+      "loss": 0.3256,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 2.267791748046875,
+      "learning_rate": 2e-05,
+      "loss": 0.2853,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 3.7516863346099854,
+      "learning_rate": 2e-05,
+      "loss": 0.3355,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 4.205787181854248,
+      "learning_rate": 2e-05,
+      "loss": 0.3998,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 6.004105567932129,
+      "learning_rate": 2e-05,
+      "loss": 0.371,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 4.294463157653809,
+      "learning_rate": 2e-05,
+      "loss": 0.4596,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 3.1517653465270996,
+      "learning_rate": 2e-05,
+      "loss": 0.403,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 3.2551352977752686,
+      "learning_rate": 2e-05,
+      "loss": 0.3488,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.0919487476348877,
+      "learning_rate": 2e-05,
+      "loss": 0.5908,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 2.9000701904296875,
+      "learning_rate": 2e-05,
+      "loss": 0.4376,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.4982464909553528,
+      "learning_rate": 2e-05,
+      "loss": 0.2203,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 3.5159378051757812,
+      "learning_rate": 2e-05,
+      "loss": 0.4652,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 1.462558627128601,
+      "learning_rate": 2e-05,
+      "loss": 0.2046,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 3.924109935760498,
+      "learning_rate": 2e-05,
+      "loss": 0.8077,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 3.89208722114563,
+      "learning_rate": 2e-05,
+      "loss": 0.3069,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 1.9117887020111084,
+      "learning_rate": 2e-05,
+      "loss": 0.2479,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 4.854358673095703,
+      "learning_rate": 2e-05,
+      "loss": 0.3298,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 4.46043586730957,
+      "learning_rate": 2e-05,
+      "loss": 0.2984,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 7.219895362854004,
+      "learning_rate": 2e-05,
+      "loss": 0.6064,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 1.80936861038208,
+      "learning_rate": 2e-05,
+      "loss": 0.2453,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 2.554882287979126,
+      "learning_rate": 2e-05,
+      "loss": 0.1651,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.7274348735809326,
+      "learning_rate": 2e-05,
+      "loss": 0.4726,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 6021332957396992.0,
+      "train_loss": 0.38677083015441893,
+      "train_runtime": 198.1718,
+      "train_samples_per_second": 1.009,
+      "train_steps_per_second": 0.252
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6021332957396992.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f0509b8ce6cd3ed751cfef1e396c7179a3f9088099d5cd2cb5b457b319dcebb
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f467a053337fecfa0f847cbc238ac4d4c5e851c9947c49765a915ebdf2da5f08
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71a9446baf72a937c51e8e31dc17acd86d9cf2e6189c9082f6d32dc45a43491c
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7becdb1ad086320eb862ea9500d9bc7b91dd4e237a2a94fcd3b992d0c97c116b
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc310_4tasks_5rounds_fixitr50_T0125_decay099/5_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59ec73292fd4cb63ea34768525c152e88f9998ee0c947f7d860eae16ddd53f7c
+size 778341034