thkim0305
/

feddat_baselines

Model card Files Files and versions Community

thkim0305 commited on 10 days ago

Commit

0d658b8

verified ·

1 Parent(s): b6876a4

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json +217 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth +3 -0
client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth +3 -0

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d15399f0edd1f91b4a2f47beace1738d4cf8d5dfaf2b45b7305d79b9bbfb21d
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:428d26a3c9c1a279a65ec5b0271ef9313bcd1f8fbe7136f6fb36e13a611444be
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05cb0bc452be8c9b338be41388d1f2439111df0c7b8092f1ba38101984a33837
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7b33073b2d4e01b918ce6d556504297b87242a7269ecd544dd4a98da48dc7b7
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f18e59a96390eb2de1672fb33aea5d2558b0f3efc4ddd9f390822d557c52c42
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cba7ce9936a84aad1f88255fc48671eccf33c457c95b6877beedce99407c7636
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ca8e4bf48ae2c4fadc9a0324efef2e5fec14ec7cfeeccc0ac29b6813d8bb1b1
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54258bf82a4d6ed0bdd02ff79c8dce338184fbab57262563baae4a52792eb45c
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/0_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 20.356369018554688,
+      "learning_rate": 2e-05,
+      "loss": 1.581,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 8.037093162536621,
+      "learning_rate": 2e-05,
+      "loss": 0.3628,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 9.684240341186523,
+      "learning_rate": 2e-05,
+      "loss": 0.4963,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 13.099488258361816,
+      "learning_rate": 2e-05,
+      "loss": 0.5486,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 24.806184768676758,
+      "learning_rate": 2e-05,
+      "loss": 1.2746,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 12.736031532287598,
+      "learning_rate": 2e-05,
+      "loss": 1.4477,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 8.170167922973633,
+      "learning_rate": 2e-05,
+      "loss": 0.2535,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 6.432157039642334,
+      "learning_rate": 2e-05,
+      "loss": 0.1452,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 2.9350321292877197,
+      "learning_rate": 2e-05,
+      "loss": 0.4796,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 5.1196184158325195,
+      "learning_rate": 2e-05,
+      "loss": 0.5928,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 8.371356964111328,
+      "learning_rate": 2e-05,
+      "loss": 0.5357,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 10.118005752563477,
+      "learning_rate": 2e-05,
+      "loss": 0.5303,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 8.173060417175293,
+      "learning_rate": 2e-05,
+      "loss": 0.5055,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 8.020771980285645,
+      "learning_rate": 2e-05,
+      "loss": 0.2534,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 3.0385186672210693,
+      "learning_rate": 2e-05,
+      "loss": 0.3931,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 6.484845161437988,
+      "learning_rate": 2e-05,
+      "loss": 0.5293,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 3.5226542949676514,
+      "learning_rate": 2e-05,
+      "loss": 0.1518,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 3.8682730197906494,
+      "learning_rate": 2e-05,
+      "loss": 0.1667,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 15.682740211486816,
+      "learning_rate": 2e-05,
+      "loss": 0.9048,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 3.051661252975464,
+      "learning_rate": 2e-05,
+      "loss": 0.2347,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 4.373608112335205,
+      "learning_rate": 2e-05,
+      "loss": 0.8616,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 1.7194321155548096,
+      "learning_rate": 2e-05,
+      "loss": 0.1839,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 8.273662567138672,
+      "learning_rate": 2e-05,
+      "loss": 0.2419,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 4.200174808502197,
+      "learning_rate": 2e-05,
+      "loss": 0.1707,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 4.162522315979004,
+      "learning_rate": 2e-05,
+      "loss": 0.4122,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2184907602264064.0,
+      "train_loss": 0.530304090976715,
+      "train_runtime": 112.0178,
+      "train_samples_per_second": 1.785,
+      "train_steps_per_second": 0.446
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2184907602264064.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e0290f254e1384dc1b37b17cd1a2fb8a4a8bf7a62e1995251d0c059fe3036a
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6f460736d1f5f9bcd282f631bd6fb842444343cf3d88d95d4f800b704aa5da1
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01463999f5efe8d57300fee5391f52db0442384ea286c5d9d33b02743486a83d
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c68012f74213cbac7a5c1607e2e1e0f117a9c05b2885cc5b05407538fb77bc2f
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3fce46215518ed15bbadbfadf2e2c58fe601c749ff93d6fcb857c43f5208f1
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4158de6adb3088e4e92a8742807ff01855816aa9185bd87b5850e634ea97bb34
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4152f5720d2ccf17af9d3d343bd5f35a6af271f91a7685a8af2139b6784fdfdc
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55a2902e50a5d007c58fc2031cd980d500510dba91d1f562e637011185bec243
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/1_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.7138079404830933,
+      "learning_rate": 2e-05,
+      "loss": 0.1539,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.025427162647247314,
+      "learning_rate": 2e-05,
+      "loss": 0.0094,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.03731539845466614,
+      "learning_rate": 2e-05,
+      "loss": 0.0288,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 4.670400619506836,
+      "learning_rate": 2e-05,
+      "loss": 0.0634,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.008197980001568794,
+      "learning_rate": 2e-05,
+      "loss": 0.0131,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.292601466178894,
+      "learning_rate": 2e-05,
+      "loss": 0.0206,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 1.6769487857818604,
+      "learning_rate": 2e-05,
+      "loss": 0.2863,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.16871647536754608,
+      "learning_rate": 2e-05,
+      "loss": 0.0143,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 16.038557052612305,
+      "learning_rate": 2e-05,
+      "loss": 0.2802,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.5586574077606201,
+      "learning_rate": 2e-05,
+      "loss": 0.0268,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.052973054349422455,
+      "learning_rate": 2e-05,
+      "loss": 0.013,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.4838956296443939,
+      "learning_rate": 2e-05,
+      "loss": 0.0118,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 2.2010490894317627,
+      "learning_rate": 2e-05,
+      "loss": 0.0197,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.02648143470287323,
+      "learning_rate": 2e-05,
+      "loss": 0.0095,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.015569723211228848,
+      "learning_rate": 2e-05,
+      "loss": 0.1625,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.07271519303321838,
+      "learning_rate": 2e-05,
+      "loss": 0.0134,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 12.36640739440918,
+      "learning_rate": 2e-05,
+      "loss": 0.1975,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 0.011994317173957825,
+      "learning_rate": 2e-05,
+      "loss": 0.0186,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 0.07830987125635147,
+      "learning_rate": 2e-05,
+      "loss": 0.0099,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.245117649435997,
+      "learning_rate": 2e-05,
+      "loss": 0.0106,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 0.09306483715772629,
+      "learning_rate": 2e-05,
+      "loss": 0.0116,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 1.065843105316162,
+      "learning_rate": 2e-05,
+      "loss": 0.0152,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 0.019034242257475853,
+      "learning_rate": 2e-05,
+      "loss": 0.0096,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.014849173836410046,
+      "learning_rate": 2e-05,
+      "loss": 0.0973,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.41532012820243835,
+      "learning_rate": 2e-05,
+      "loss": 0.0126,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2203723937873920.0,
+      "train_loss": 0.060387180894613264,
+      "train_runtime": 113.8328,
+      "train_samples_per_second": 1.757,
+      "train_steps_per_second": 0.439
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2203723937873920.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55e9d3fd73bbdcad52830e269e2f3893da64867291b0aee9a0e4955d33c75627
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a85b9f1aa53edaad8c7a91bca1a3c70f20eb871f8dd521b068458c7141fc76c
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87a567a7777a1278ff501e2a4345b52e9eb9aeb5e0a651e43d5f8898fd830e16
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8291d8f62e0d8059d91b1002ed6f1e1830c888064a6e305bc691db4e3a7c5be
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d467059117666b4bb77ae3dfce1433137ba25364531e1f1648c6de90643de6cd
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c86f50e0f4809ed2809088768d554284ed9990868438e53b93eed85ac993822a
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0199d3ec64caff6ce480c8099992a2541bbee8a3b53ba12c681a7a19ab123831
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b7717a34391c3b362458f2148b4270e4a5682bd69eda667d4aa4d3b9e4ad72e
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/2_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 8.67927360534668,
+      "learning_rate": 2e-05,
+      "loss": 0.5073,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 1.070449709892273,
+      "learning_rate": 2e-05,
+      "loss": 0.0556,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 5.557805061340332,
+      "learning_rate": 2e-05,
+      "loss": 0.179,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 5.5926971435546875,
+      "learning_rate": 2e-05,
+      "loss": 0.4056,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 3.1736576557159424,
+      "learning_rate": 2e-05,
+      "loss": 0.137,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 8.38901424407959,
+      "learning_rate": 2e-05,
+      "loss": 0.3255,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 2.2440390586853027,
+      "learning_rate": 2e-05,
+      "loss": 0.4894,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 2.3557586669921875,
+      "learning_rate": 2e-05,
+      "loss": 0.424,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 2.672835350036621,
+      "learning_rate": 2e-05,
+      "loss": 0.272,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 2.5702593326568604,
+      "learning_rate": 2e-05,
+      "loss": 0.2785,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.3932490646839142,
+      "learning_rate": 2e-05,
+      "loss": 0.0989,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 4.769493579864502,
+      "learning_rate": 2e-05,
+      "loss": 0.254,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 5.8169331550598145,
+      "learning_rate": 2e-05,
+      "loss": 0.3336,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.9339334964752197,
+      "learning_rate": 2e-05,
+      "loss": 0.1087,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 4.087733745574951,
+      "learning_rate": 2e-05,
+      "loss": 0.1163,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 7.105349063873291,
+      "learning_rate": 2e-05,
+      "loss": 0.7171,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 2.8530874252319336,
+      "learning_rate": 2e-05,
+      "loss": 0.2644,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 6.338785648345947,
+      "learning_rate": 2e-05,
+      "loss": 0.5313,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 5.0571136474609375,
+      "learning_rate": 2e-05,
+      "loss": 0.1923,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 7.593774795532227,
+      "learning_rate": 2e-05,
+      "loss": 0.4613,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 11.923439979553223,
+      "learning_rate": 2e-05,
+      "loss": 0.7395,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 0.8395790457725525,
+      "learning_rate": 2e-05,
+      "loss": 0.0462,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 3.3043808937072754,
+      "learning_rate": 2e-05,
+      "loss": 0.2629,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 9.646453857421875,
+      "learning_rate": 2e-05,
+      "loss": 0.3714,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 4.387805461883545,
+      "learning_rate": 2e-05,
+      "loss": 0.2219,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 5203228526379008.0,
+      "train_loss": 0.31173837661743165,
+      "train_runtime": 189.427,
+      "train_samples_per_second": 1.056,
+      "train_steps_per_second": 0.264
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5203228526379008.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4954490d41567c600c766cb6260cf6575e0faa298da32365174a9be869b521
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5e9244b1cfbf88b52befc18b724e7cd671e4eb407aabd8884118c9e9e5212dd
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b6c22f2e8e6010de5e540eed7cd6d9544224ba58ebc34ec3b66db8fccdc0a6d
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67f6953201fb2a4dde70231b9bc251dcbcc1777c95f3625e0d02a3b94a895caa
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d843fd5a0b30e5d2423185c7f7c1087228c7ae93460fd1db50dff217bfa5ea9
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e46d20265302b91f3f24e0d48d23aeb7fa6a7185fc4e7b7ad9e7e02243bed549
+size 360880622

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:975b40293671bbdeb5d32200b2bd64f0890718dc69b9e7162e2fb26a36afe6a1
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d853326567172e3fdbcb84b8d3f5e7c1f022f8b7bc03a0334be7fcdcf3bab33
+size 360880106

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/3_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 18.76683235168457,
+      "learning_rate": 2e-05,
+      "loss": 1.0907,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 12.490086555480957,
+      "learning_rate": 2e-05,
+      "loss": 0.7188,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 15.25229549407959,
+      "learning_rate": 2e-05,
+      "loss": 0.6253,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 22.94184684753418,
+      "learning_rate": 2e-05,
+      "loss": 1.2405,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 5.571078777313232,
+      "learning_rate": 2e-05,
+      "loss": 0.389,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 9.62364387512207,
+      "learning_rate": 2e-05,
+      "loss": 0.7317,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 7.308901309967041,
+      "learning_rate": 2e-05,
+      "loss": 0.5968,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 4.2408223152160645,
+      "learning_rate": 2e-05,
+      "loss": 0.5853,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 8.469420433044434,
+      "learning_rate": 2e-05,
+      "loss": 0.6742,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 4.550015926361084,
+      "learning_rate": 2e-05,
+      "loss": 0.4569,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 8.425732612609863,
+      "learning_rate": 2e-05,
+      "loss": 0.5865,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 10.20775318145752,
+      "learning_rate": 2e-05,
+      "loss": 0.4898,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 15.613037109375,
+      "learning_rate": 2e-05,
+      "loss": 0.8773,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 7.8877410888671875,
+      "learning_rate": 2e-05,
+      "loss": 0.8102,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 11.263969421386719,
+      "learning_rate": 2e-05,
+      "loss": 0.8425,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 10.401885986328125,
+      "learning_rate": 2e-05,
+      "loss": 0.7032,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 8.376611709594727,
+      "learning_rate": 2e-05,
+      "loss": 0.4214,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 3.5057554244995117,
+      "learning_rate": 2e-05,
+      "loss": 0.4259,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 14.423259735107422,
+      "learning_rate": 2e-05,
+      "loss": 0.474,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 5.601011753082275,
+      "learning_rate": 2e-05,
+      "loss": 0.4506,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 8.93952465057373,
+      "learning_rate": 2e-05,
+      "loss": 0.795,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 7.401601314544678,
+      "learning_rate": 2e-05,
+      "loss": 0.4657,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 6.547579765319824,
+      "learning_rate": 2e-05,
+      "loss": 0.524,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 4.996316909790039,
+      "learning_rate": 2e-05,
+      "loss": 0.8237,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 14.771405220031738,
+      "learning_rate": 2e-05,
+      "loss": 0.9451,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 2233493086011392.0,
+      "train_loss": 0.6697625303268433,
+      "train_runtime": 113.9082,
+      "train_samples_per_second": 1.756,
+      "train_steps_per_second": 0.439
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2233493086011392.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc8f43e8703563b6486c90073e04b43b429fa3218145830d8a090d468ec437bd
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99730f58c467e3669a4d4713dd9b1451cff4b93a58ae46c46497eb8f731facfd
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6edd260c1245ae17fbbd8bdc254dc7d9c62b4151143c13392a97e053e4bc420f
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ed393604bc7cd9f3ae87cd5c96a132a1a9ee3dba8c55b6a3a0aaa11c23731c2
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a06037709047bff4a95ef960ee58dbdfe9a84bcda95a019105db9e35084c5945
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round20.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21ba21d09b33136f5e652150ade2f8ff774b5b30eb0209d0e1b4eccfda6525f8
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5ace8e0b834acde0cd7f0b3c99ab0c668c3019d4f8c61d1e257934541a3bb25
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_client_model_round7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49d74bc1d4d6cd847082f58e5ab66e88b532413a1e07e096ea389507b3a50d0d
+size 778341034

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/4_trainer_state.json ADDED Viewed

	@@ -0,0 +1,217 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.42625540494918823,
+      "learning_rate": 2e-05,
+      "loss": 0.3228,
+      "step": 2
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.7395199537277222,
+      "learning_rate": 2e-05,
+      "loss": 0.0953,
+      "step": 4
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 4.357167720794678,
+      "learning_rate": 2e-05,
+      "loss": 0.342,
+      "step": 6
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 3.030724048614502,
+      "learning_rate": 2e-05,
+      "loss": 0.453,
+      "step": 8
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 4.075497150421143,
+      "learning_rate": 2e-05,
+      "loss": 0.2039,
+      "step": 10
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 3.159764528274536,
+      "learning_rate": 2e-05,
+      "loss": 0.3253,
+      "step": 12
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 4.061643123626709,
+      "learning_rate": 2e-05,
+      "loss": 0.2518,
+      "step": 14
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.8145078420639038,
+      "learning_rate": 2e-05,
+      "loss": 0.4027,
+      "step": 16
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 2.392146348953247,
+      "learning_rate": 2e-05,
+      "loss": 0.1145,
+      "step": 18
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 4.058164119720459,
+      "learning_rate": 2e-05,
+      "loss": 0.4689,
+      "step": 20
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 1.3680384159088135,
+      "learning_rate": 2e-05,
+      "loss": 0.2133,
+      "step": 22
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 8.989691734313965,
+      "learning_rate": 2e-05,
+      "loss": 0.486,
+      "step": 24
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 8.30005931854248,
+      "learning_rate": 2e-05,
+      "loss": 0.6934,
+      "step": 26
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 12.295815467834473,
+      "learning_rate": 2e-05,
+      "loss": 0.7844,
+      "step": 28
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 4.061346530914307,
+      "learning_rate": 2e-05,
+      "loss": 0.2592,
+      "step": 30
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 4.065572738647461,
+      "learning_rate": 2e-05,
+      "loss": 0.5118,
+      "step": 32
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 3.6354613304138184,
+      "learning_rate": 2e-05,
+      "loss": 0.472,
+      "step": 34
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 5.219471454620361,
+      "learning_rate": 2e-05,
+      "loss": 0.413,
+      "step": 36
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 4.679935932159424,
+      "learning_rate": 2e-05,
+      "loss": 0.3027,
+      "step": 38
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 3.4444127082824707,
+      "learning_rate": 2e-05,
+      "loss": 0.4347,
+      "step": 40
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 2.349905252456665,
+      "learning_rate": 2e-05,
+      "loss": 0.2921,
+      "step": 42
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 3.327099323272705,
+      "learning_rate": 2e-05,
+      "loss": 0.2939,
+      "step": 44
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 2.9319257736206055,
+      "learning_rate": 2e-05,
+      "loss": 0.2592,
+      "step": 46
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 2.7675695419311523,
+      "learning_rate": 2e-05,
+      "loss": 0.4071,
+      "step": 48
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 7.027470588684082,
+      "learning_rate": 2e-05,
+      "loss": 0.3092,
+      "step": 50
+    },
+    {
+      "epoch": 2.0,
+      "step": 50,
+      "total_flos": 6019810878029824.0,
+      "train_loss": 0.3644888877868652,
+      "train_runtime": 192.4302,
+      "train_samples_per_second": 1.039,
+      "train_steps_per_second": 0.26
+    }
+  ],
+  "logging_steps": 2,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6019810878029824.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c46f9b30ec1ddd0705e19e3a27e0ee5ae37bfc4c2d9a99b2d33be8dab34eab79
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0bd3170badc104b1340132038527d6226113ca6eebce39a3f6699be2b0d1e46
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f97139241266520411df7976e108c2040ec519ca76fb4cb87b88ad7696cf6c6
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round17.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67fe3a24e6bec05583d408402670efcf642e612602b2bff16363992dfcd4e812
+size 778341886

client_states_ditto_bs4_saveoptim_lr2e-5_5e-5_sc315_4tasks_5rounds_fixitr50_T0125_decay099_SEED2/5_client_model_round2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eda87c5048819b2eb77ff1681af2797f1098905aae19565a8d5df6a3a59e038c
+size 778341034