Upload OLMo-2 model checkpoint
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +129 -0
- .metadata.json +1 -0
- config.json +1 -0
- data_paths.txt +2 -0
- model_and_optim/.metadata +3 -0
- model_and_optim/__0_0.distcp +3 -0
- model_and_optim/__0_1.distcp +3 -0
- model_and_optim/__0_10.distcp +3 -0
- model_and_optim/__0_11.distcp +3 -0
- model_and_optim/__0_12.distcp +3 -0
- model_and_optim/__0_13.distcp +3 -0
- model_and_optim/__0_14.distcp +3 -0
- model_and_optim/__0_15.distcp +3 -0
- model_and_optim/__0_2.distcp +3 -0
- model_and_optim/__0_3.distcp +3 -0
- model_and_optim/__0_4.distcp +3 -0
- model_and_optim/__0_5.distcp +3 -0
- model_and_optim/__0_6.distcp +3 -0
- model_and_optim/__0_7.distcp +3 -0
- model_and_optim/__0_8.distcp +3 -0
- model_and_optim/__0_9.distcp +3 -0
- model_and_optim/__1_0.distcp +3 -0
- model_and_optim/__1_1.distcp +3 -0
- model_and_optim/__1_10.distcp +3 -0
- model_and_optim/__1_11.distcp +3 -0
- model_and_optim/__1_12.distcp +3 -0
- model_and_optim/__1_13.distcp +3 -0
- model_and_optim/__1_14.distcp +3 -0
- model_and_optim/__1_15.distcp +3 -0
- model_and_optim/__1_2.distcp +3 -0
- model_and_optim/__1_3.distcp +3 -0
- model_and_optim/__1_4.distcp +3 -0
- model_and_optim/__1_5.distcp +3 -0
- model_and_optim/__1_6.distcp +3 -0
- model_and_optim/__1_7.distcp +3 -0
- model_and_optim/__1_8.distcp +3 -0
- model_and_optim/__1_9.distcp +3 -0
- model_and_optim/__2_0.distcp +3 -0
- model_and_optim/__2_1.distcp +3 -0
- model_and_optim/__2_10.distcp +3 -0
- model_and_optim/__2_11.distcp +3 -0
- model_and_optim/__2_12.distcp +3 -0
- model_and_optim/__2_13.distcp +3 -0
- model_and_optim/__2_14.distcp +3 -0
- model_and_optim/__2_15.distcp +3 -0
- model_and_optim/__2_2.distcp +3 -0
- model_and_optim/__2_3.distcp +3 -0
- model_and_optim/__2_4.distcp +3 -0
- model_and_optim/__2_5.distcp +3 -0
- model_and_optim/__2_6.distcp +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,132 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
model_and_optim/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
model_and_optim/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
model_and_optim/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
model_and_optim/__0_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
model_and_optim/__0_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
model_and_optim/__0_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
model_and_optim/__0_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
model_and_optim/__0_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
model_and_optim/__0_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
model_and_optim/__0_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
model_and_optim/__0_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
model_and_optim/__0_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
model_and_optim/__0_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
model_and_optim/__0_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
model_and_optim/__0_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
model_and_optim/__0_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
model_and_optim/__0_9.distcp filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
model_and_optim/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
model_and_optim/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
model_and_optim/__1_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
model_and_optim/__1_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
model_and_optim/__1_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
model_and_optim/__1_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
model_and_optim/__1_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
model_and_optim/__1_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
model_and_optim/__1_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
model_and_optim/__1_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
model_and_optim/__1_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
model_and_optim/__1_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
model_and_optim/__1_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
model_and_optim/__1_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
model_and_optim/__1_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
model_and_optim/__1_9.distcp filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
model_and_optim/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
model_and_optim/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
model_and_optim/__2_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
model_and_optim/__2_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
model_and_optim/__2_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
model_and_optim/__2_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
model_and_optim/__2_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
model_and_optim/__2_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
model_and_optim/__2_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
model_and_optim/__2_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
model_and_optim/__2_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
model_and_optim/__2_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
model_and_optim/__2_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
model_and_optim/__2_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
model_and_optim/__2_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
model_and_optim/__2_9.distcp filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
model_and_optim/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
model_and_optim/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
model_and_optim/__3_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
model_and_optim/__3_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
model_and_optim/__3_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
model_and_optim/__3_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
model_and_optim/__3_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
model_and_optim/__3_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
model_and_optim/__3_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
model_and_optim/__3_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
model_and_optim/__3_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
model_and_optim/__3_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
model_and_optim/__3_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
model_and_optim/__3_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
model_and_optim/__3_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
model_and_optim/__3_9.distcp filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
model_and_optim/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
model_and_optim/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
model_and_optim/__4_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
model_and_optim/__4_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
model_and_optim/__4_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
model_and_optim/__4_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
model_and_optim/__4_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
model_and_optim/__4_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
model_and_optim/__4_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
model_and_optim/__4_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
model_and_optim/__4_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
model_and_optim/__4_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
model_and_optim/__4_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
model_and_optim/__4_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
model_and_optim/__4_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
model_and_optim/__4_9.distcp filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
model_and_optim/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
model_and_optim/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
model_and_optim/__5_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
model_and_optim/__5_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
model_and_optim/__5_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
model_and_optim/__5_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
model_and_optim/__5_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
model_and_optim/__5_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
model_and_optim/__5_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
model_and_optim/__5_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
model_and_optim/__5_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
model_and_optim/__5_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
model_and_optim/__5_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
model_and_optim/__5_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
model_and_optim/__5_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
model_and_optim/__5_9.distcp filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
model_and_optim/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
model_and_optim/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
model_and_optim/__6_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
model_and_optim/__6_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
model_and_optim/__6_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
model_and_optim/__6_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
model_and_optim/__6_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
model_and_optim/__6_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
model_and_optim/__6_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
model_and_optim/__6_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
model_and_optim/__6_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
model_and_optim/__6_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
model_and_optim/__6_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
model_and_optim/__6_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
model_and_optim/__6_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
model_and_optim/__6_9.distcp filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
model_and_optim/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 150 |
+
model_and_optim/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
model_and_optim/__7_10.distcp filter=lfs diff=lfs merge=lfs -text
|
| 152 |
+
model_and_optim/__7_11.distcp filter=lfs diff=lfs merge=lfs -text
|
| 153 |
+
model_and_optim/__7_12.distcp filter=lfs diff=lfs merge=lfs -text
|
| 154 |
+
model_and_optim/__7_13.distcp filter=lfs diff=lfs merge=lfs -text
|
| 155 |
+
model_and_optim/__7_14.distcp filter=lfs diff=lfs merge=lfs -text
|
| 156 |
+
model_and_optim/__7_15.distcp filter=lfs diff=lfs merge=lfs -text
|
| 157 |
+
model_and_optim/__7_2.distcp filter=lfs diff=lfs merge=lfs -text
|
| 158 |
+
model_and_optim/__7_3.distcp filter=lfs diff=lfs merge=lfs -text
|
| 159 |
+
model_and_optim/__7_4.distcp filter=lfs diff=lfs merge=lfs -text
|
| 160 |
+
model_and_optim/__7_5.distcp filter=lfs diff=lfs merge=lfs -text
|
| 161 |
+
model_and_optim/__7_6.distcp filter=lfs diff=lfs merge=lfs -text
|
| 162 |
+
model_and_optim/__7_7.distcp filter=lfs diff=lfs merge=lfs -text
|
| 163 |
+
model_and_optim/__7_8.distcp filter=lfs diff=lfs merge=lfs -text
|
| 164 |
+
model_and_optim/__7_9.distcp filter=lfs diff=lfs merge=lfs -text
|
.metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"version": "2.0.0"}
|
config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"run_name": "caia_olmo2_1b_d3", "launch": {"name": "caia_olmo2_1b_d3-train-95a8ba94", "cmd": ["src/scripts/train/OLMoE-2x1B-anneal.py", "train", "caia_olmo2_1b_d3", "ai2/jupiter-cirrascale-2", "--launch.num_nodes=1", "--launch.workspace=OLMo-modular", "--launch.priority=urgent", "--launch.beaker_image=petew/olmo-core-tch260cu124", "--trainer.callbacks.wandb.enabled=True", "--trainer.callbacks.comet.enabled=False", "--trainer.max_duration.value=200_000_000", "--trainer.max_duration.unit=tokens", "--dataset.mix_base_dir=/weka/oe-training-default/ai2-llm/preprocessed", "--dataset.mix=caia", "--trainer.load_path=/weka/oe-training-default/ai2-llm/checkpoints/swj0419/model/merge/olmo2_1b/moe2_random", "--train_module.dp_config.num_replicas=4", "--train_module.scheduler.warmup_steps=2000", "--train_module.optim.lr=4e-5", "--model.block.feed_forward_moe.router.top_k=2", "--train_module.rank_microbatch_size=4096", "--train_module.ep_config.degree=2"], "budget": "ai2/oe-base", "task_name": "train", "workspace": "OLMo-modular", "setup_steps": ["conda install gh --channel conda-forge", "gh repo clone \"$REPO_URL\" .", "git checkout \"$GIT_REF\"", "git submodule update --init --recursive", "conda shell.bash activate base", "pip install -e '.[dev,beaker,wandb,train]'", "pip freeze", "mkdir -p ~/.aws", "printenv AWS_CONFIG > ~/.aws/config", "printenv AWS_CREDENTIALS > ~/.aws/credentials"], "beaker_image": "petew/olmo-core-tch260cu124", "num_nodes": 1, "num_gpus": 8, "shared_memory": "10GiB", "clusters": ["ai2/jupiter-cirrascale-2"], "shared_filesystem": true, "priority": "urgent", "preemptible": true, "env_vars": [{"name": "NCCL_DEBUG", "value": "WARN", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvVar"}, {"name": "CUDA_LAUNCH_BLOCKING", "value": "0", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvVar"}], "env_secrets": [{"name": "GITHUB_TOKEN", "secret": "weijias_GITHUB_TOKEN", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "BEAKER_TOKEN", "secret": "weijias_BEAKER_TOKEN", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "WANDB_API_KEY", "secret": "weijias_WANDB_API_KEY", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "COMET_API_KEY", "secret": "weijias_COMET_API_KEY", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "AWS_CONFIG", "secret": "weijias_AWS_CONFIG", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "AWS_CREDENTIALS", "secret": "weijias_AWS_CREDENTIALS", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "R2_ENDPOINT_URL", "secret": "R2_ENDPOINT_URL", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "WEKA_ENDPOINT_URL", "secret": "WEKA_ENDPOINT_URL", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}, {"name": "SLACK_WEBHOOK_URL", "secret": "SLACK_WEBHOOK_URL", "_CLASS_": "olmo_core.launch.beaker.BeakerEnvSecret"}], "nfs": false, "weka_buckets": [{"bucket": "oe-training-default", "mount": "/weka/oe-training-default", "_CLASS_": "olmo_core.launch.beaker.BeakerWekaBucket"}], "allow_dirty": false, "_CLASS_": "olmo_core.launch.beaker.BeakerLaunchConfig"}, "model": {"d_model": 2048, "vocab_size": 100352, "n_layers": 16, "block": {"attention": {"name": "default", "n_heads": 16, "bias": false, "rope": {"name": "default", "theta": 500000, "full_precision": true, "_CLASS_": "olmo_core.nn.rope.RoPEConfig"}, "qk_norm": {"name": "rms", "eps": 1e-06, "bias": false, "dtype": "float32", "_CLASS_": "olmo_core.nn.layer_norm.LayerNormConfig"}, "use_flash": false, "dtype": "float32", "_CLASS_": "olmo_core.nn.attention.AttentionConfig"}, "layer_norm": {"name": "rms", "eps": 1e-06, "bias": false, "dtype": "float32", "_CLASS_": "olmo_core.nn.layer_norm.LayerNormConfig"}, "feed_forward_moe": {"name": "default", "num_experts": 2, "hidden_size": 8192, "capacity_factor": 1.2, "router": {"name": "default", "top_k": 2, "uniform_expert_assignment": false, "_CLASS_": "olmo_core.nn.moe.router.MoERouterConfig"}, "lb_loss_weight": 0.0, "z_loss_weight": 0.001, "dtype": "float32", "_CLASS_": "olmo_core.nn.moe.moe.MoEConfig"}, "name": "moe_reordered_norm", "_CLASS_": "olmo_core.nn.transformer.config.TransformerBlockConfig"}, "lm_head": {"name": "default", "layer_norm": {"name": "rms", "eps": 1e-06, "bias": false, "dtype": "float32", "_CLASS_": "olmo_core.nn.layer_norm.LayerNormConfig"}, "bias": false, "dtype": "float32", "loss_implementation": "default", "_CLASS_": "olmo_core.nn.lm_head.LMHeadConfig"}, "name": "moe", "dtype": "float32", "init_method": "normal", "init_seed": 0, "freeze_params": ["embeddings.*", "blocks.*.attention*", "blocks.*.feed_forward_norm.*", "lm_head.*"], "_CLASS_": "olmo_core.nn.transformer.config.TransformerConfig"}, "dataset": {"tokenizer": {"vocab_size": 100278, "eos_token_id": 100257, "pad_token_id": 100277, "identifier": "allenai/dolma2-tokenizer", "_CLASS_": "olmo_core.data.tokenizer.TokenizerConfig"}, "name": "fsl", "sequence_length": 4096, "max_target_sequence_length": 8192, "mix": "caia", "mix_base_dir": "/weka/oe-training-default/ai2-llm", "include_instance_metadata": true, "generate_doc_lengths": false, "expand_glob": false, "work_dir": "/weka/oe-training-default/ai2-llm/checkpoints/weijias/caia_olmo2_1b_d3/dataset-cache", "_CLASS_": "olmo_core.data.numpy_dataset.NumpyDatasetConfig"}, "data_loader": {"global_batch_size": 4194304, "seed": 34521, "num_workers": 4, "_CLASS_": "olmo_core.data.data_loader.NumpyDataLoaderConfig"}, "train_module": {"rank_microbatch_size": 4096, "max_sequence_length": 4096, "optim": {"compile": false, "fixed_fields": ["initial_lr"], "lr": 4e-05, "betas": [0.9, 0.95], "eps": 1e-08, "weight_decay": 0.0, "fused": true, "_CLASS_": "olmo_core.optim.adamw.AdamWConfig"}, "max_grad_norm": 1.0, "scheduler": {"lr_field": "lr", "initial_lr_field": "initial_lr", "warmup_steps": 2000, "alpha_f": 0.1, "warmup_min_lr": 0.0, "_CLASS_": "olmo_core.optim.scheduler.CosWithWarmup"}, "compile_model": true, "dp_config": {"name": "hsdp", "param_dtype": "bfloat16", "reduce_dtype": "float32", "num_replicas": 4, "wrapping_strategy": "fine_grained", "prefetch_factor": 0, "_CLASS_": "olmo_core.train.train_module.transformer.TransformerDataParallelConfig"}, "ep_config": {"degree": 2, "_CLASS_": "olmo_core.train.train_module.transformer.TransformerExpertParallelConfig"}, "z_loss_multiplier": 1e-05, "label_ignore_index": -100, "_CLASS_": "olmo_modular.train.train_module.transfomer.FreezeTransformerTrainModuleConfig"}, "trainer": {"save_folder": "/weka/oe-training-default/ai2-llm/checkpoints/weijias/caia_olmo2_1b_d3", "load_path": "/weka/oe-training-default/ai2-llm/checkpoints/swj0419/model/merge/olmo2_1b/moe2_random", "load_strategy": "if_available", "checkpointer": {"pre_download": false, "throttle_uploads": false, "_CLASS_": "olmo_core.train.checkpoint.CheckpointerConfig"}, "save_overwrite": true, "max_duration": {"value": 200000000, "unit": "tokens", "_CLASS_": "olmo_core.train.common.Duration"}, "cancel_check_interval": 1, "metrics_collect_interval": 10, "callbacks": {"downstream_evaluator": {"tasks": ["piqa"], "tokenizer": {"vocab_size": 100278, "eos_token_id": 100257, "pad_token_id": 100277, "identifier": "allenai/dolma2-tokenizer", "_CLASS_": "olmo_core.data.tokenizer.TokenizerConfig"}, "eval_interval": 500, "eval_duration": {"value": 1, "unit": "epochs", "_CLASS_": "olmo_core.train.common.Duration"}, "log_interval": 5, "enabled": true, "_CLASS_": "olmo_modular.eval.evaluator_callback.DownstreamEvaluatorUpdatedCallbackConfig"}, "checkpointer": {"save_interval": 10000, "ephemeral_save_interval": 250, "save_async": true, "remove": "ephemeral_only", "enabled": true, "_CLASS_": "olmo_core.train.callbacks.checkpointer.CheckpointerCallback"}, "comet": {"enabled": false, "name": "caia_olmo2_1b_d3", "project": "OLMo-modular", "workspace": "ai2", "cancel_tags": ["cancel", "canceled", "cancelled"], "cancel_check_interval": 10, "notifications": "none", "failure_tag": "failed", "_CLASS_": "olmo_core.train.callbacks.comet.CometCallback"}, "wandb": {"enabled": true, "name": "caia_olmo2_1b_d3", "project": "OLMo-modular", "entity": "ai2-llm", "cancel_tags": ["cancel", "canceled", "cancelled"], "cancel_check_interval": 10, "_CLASS_": "olmo_core.train.callbacks.wandb.WandBCallback"}, "config_saver": {"fname": "config.json", "_CLASS_": "olmo_core.train.callbacks.config_saver.ConfigSaverCallback"}, "profiler": {"skip_first": 0, "wait": 1, "warmup": 5, "active": 3, "repeat": 1, "enabled": false, "_CLASS_": "olmo_core.train.callbacks.profiler.ProfilerCallback"}, "garbage_collector": {"gc_interval": 1000, "enabled": true, "_CLASS_": "olmo_core.train.callbacks.garbage_collector.GarbageCollectorCallback"}, "slack_notifier": {"name": "caia_olmo2_1b_d3", "notifications": "end_only", "enabled": false, "_CLASS_": "olmo_core.train.callbacks.slack_notifier.SlackNotifierCallback"}, "beaker": {"enabled": true, "_CLASS_": "olmo_core.train.callbacks.beaker.BeakerCallback"}, "gpu_monitor": {"_CLASS_": "olmo_core.train.callbacks.gpu_memory_monitor.GPUMemoryMonitorCallback"}, "lm_evaluator": {"eval_dataset": {"tokenizer": {"vocab_size": 100278, "eos_token_id": 100257, "pad_token_id": 100277, "identifier": "allenai/dolma2-tokenizer", "_CLASS_": "olmo_core.data.tokenizer.TokenizerConfig"}, "name": "padded_fsl", "sequence_length": 4096, "mix": "v3-small-ppl-validation", "mix_base_dir": "/weka/oe-training-default/ai2-llm", "include_instance_metadata": true, "generate_doc_lengths": false, "expand_glob": false, "work_dir": "/weka/oe-training-default/ai2-llm/checkpoints/weijias/dataset-cache", "_CLASS_": "olmo_core.data.numpy_dataset.NumpyDatasetConfig"}, "eval_interval": 1000, "eval_duration": {"value": 1, "unit": "epochs", "_CLASS_": "olmo_core.train.common.Duration"}, "log_interval": 5, "enabled": true, "_CLASS_": "olmo_core.train.callbacks.evaluator_callback.LMEvaluatorCallbackConfig"}}, "no_checkpoints": false, "no_evals": false, "_CLASS_": "olmo_core.train.config.TrainerConfig"}, "init_seed": 12536, "_CLASS_": "olmo_modular.internal.freeze_experiment.ExperimentConfig"}
|
data_paths.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/weka/oe-training-default/ai2-llm/caia_olmo2/combine_tokenized/part-0-00000.npy
|
| 2 |
+
/weka/oe-training-default/ai2-llm/caia_olmo2/combine_tokenized/part-1-00000.npy
|
model_and_optim/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c25f5639792ad24246cb78b1ba2920fa504ea59100266013bddaeb02692e5869
|
| 3 |
+
size 616662
|
model_and_optim/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e61e7bb41da152f4f948b5f82b5f92041ee4435e242c7eed7a3996a6b253c417
|
| 3 |
+
size 545314056
|
model_and_optim/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a75c7c4aa1231c20ede8227af30b38a6fe86d2d3103f729520935745ad5da9a1
|
| 3 |
+
size 545314056
|
model_and_optim/__0_10.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af457da84aafb48dc8b4a798a1893334b446276549c31807d8f0dea7f78b31ca
|
| 3 |
+
size 184666668
|
model_and_optim/__0_11.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3946ea81f7c32e03f2c8597b052ac370a3b32005032a71fbee38a86accedb767
|
| 3 |
+
size 184666668
|
model_and_optim/__0_12.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0febd277fa5a17bcb0fa905d5857e7791b1cb73adb483371e2bbb2cd311957a6
|
| 3 |
+
size 184666668
|
model_and_optim/__0_13.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec449b3c76a0b6292152d7d3d450a9be541240bfbbac3e55bc599ffe47d44f8b
|
| 3 |
+
size 184666668
|
model_and_optim/__0_14.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fdcd263934ff2e9860dce95862a41df865989b914a3f05a449f2d9b3c774b81
|
| 3 |
+
size 235006576
|
model_and_optim/__0_15.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a84ae79e526b03fffe0b28886e07f38e1be925dd487a6ba03179461a00567da4
|
| 3 |
+
size 235006576
|
model_and_optim/__0_2.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a50244048048de753ab58f829e8b366ba5d8a8cbc0d535dbc430851c95d97f6
|
| 3 |
+
size 184660144
|
model_and_optim/__0_3.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38a3e7ae6e4015d4f157bdbe2babe2fa9c296a2e9a5970919994235734c23b5b
|
| 3 |
+
size 184660144
|
model_and_optim/__0_4.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7caa231a1072b75e5e34c631e1e160e7a77b8c7660c17b87edfeb5edb86d0ed
|
| 3 |
+
size 184660144
|
model_and_optim/__0_5.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20598cf5e65d5cfe49aa39708d0245a856e41ac18cfb2a1c95b655ffd3a78c52
|
| 3 |
+
size 184660144
|
model_and_optim/__0_6.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0a5e29880353df214007ef10ffd4146ac4a6f84117feaf22d169f7508dd80f5
|
| 3 |
+
size 184660144
|
model_and_optim/__0_7.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0acf275433ddda4c1dc73a8fbf3cb19b6a2c5e40093d27efad768532f3c8ddf3
|
| 3 |
+
size 184660144
|
model_and_optim/__0_8.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:246dcfaf8a4ac8c8209a04b7fb625157fc26cd9f89d2791b63841687e92a0151
|
| 3 |
+
size 184661324
|
model_and_optim/__0_9.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73ab491220f462d7171250d3ff9532ec073e7512941f66927ce4f18ee6df8330
|
| 3 |
+
size 184667848
|
model_and_optim/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:727a8e4eb5e0548618ba92c313fb5b16a35ebb20477516b5cbd8e7d7e7d7de10
|
| 3 |
+
size 545276040
|
model_and_optim/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1d910b74fba7d81ddd887b4ef43e430a69f236e7d9a40415b07484668ce5d39
|
| 3 |
+
size 545276040
|
model_and_optim/__1_10.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb0f518e97d119a9c0bde3c96bcfef8d807e27830bcd3402c5454f4f5523c858
|
| 3 |
+
size 184618032
|
model_and_optim/__1_11.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc928c2e4ca56cdbcf80ac36da987fa719ae5da64760fb31c3ff135b8241b44b
|
| 3 |
+
size 184618032
|
model_and_optim/__1_12.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73f1f684e4e1c957c70ecfa7411ec25de96d85f2ac9d4a7945f696a8c4775c2e
|
| 3 |
+
size 184618032
|
model_and_optim/__1_13.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2244bc3d52fec82f8229ff9f45cd29c42c97eab40d94ae59a6041da518afa310
|
| 3 |
+
size 184618032
|
model_and_optim/__1_14.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3bc1fab016d42eb40630d23e3e3ba3b5b4948410f46445ec22294287c7f07e3
|
| 3 |
+
size 234957940
|
model_and_optim/__1_15.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d2e18d5bd5832c507789e8771ffff5b91e181efecd5c855c22a7868d96bad97
|
| 3 |
+
size 234957940
|
model_and_optim/__1_2.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34e289e5dca974468e39067b6fc97e698ba9441cc500537d0fa9b77b01bfec86
|
| 3 |
+
size 184622128
|
model_and_optim/__1_3.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f26a272b661a76c8837848843031cd81bcfd17494dadf86d7ba5de5382c7ece
|
| 3 |
+
size 184622128
|
model_and_optim/__1_4.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:019559b5433399fd8108a958951bdd315492e3b956e5e4b2023bc05eb8bcb9d8
|
| 3 |
+
size 184622128
|
model_and_optim/__1_5.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:683a1a58a844675429ff1de10aaaa75a2c79b7e2c53198329d172aa5309a6cf2
|
| 3 |
+
size 184622128
|
model_and_optim/__1_6.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09b6e0f15b468a7acfb5c0a06720d53997dfce700c1b91d0488d1cc43885a503
|
| 3 |
+
size 184622128
|
model_and_optim/__1_7.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cf048ff61144716923c36c9a69d7abcbea3733c6e1cb500f9e89238d8f974ec
|
| 3 |
+
size 184622128
|
model_and_optim/__1_8.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e3bb846150594185392d790dfbdeb61d40011a304818aa9e7a610f933b3877b
|
| 3 |
+
size 184623308
|
model_and_optim/__1_9.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:595cdfc5038ec6b9754ebca4f704559680391ab60a409779a1d176504f4330f4
|
| 3 |
+
size 184618032
|
model_and_optim/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7328b4eb7fdc6050c7b37ab651d85f735d4315aa1b5f2a9c35ae7db9edb88f38
|
| 3 |
+
size 151005564
|
model_and_optim/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e3f749f7b34918f8156c0c503cf23a71b7502bfc1d89d351d99a696948a0dc1
|
| 3 |
+
size 151005564
|
model_and_optim/__2_10.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d564b0fb22af7caf17399fe6a9a64f2ac19efdf2655af10f593c6dc066189b71
|
| 3 |
+
size 151005564
|
model_and_optim/__2_11.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d6b2ad31fd84ac2905aa293c33467f57ae7c531d6eb1de751c9e146d75d0b04
|
| 3 |
+
size 151005564
|
model_and_optim/__2_12.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:395aa7f87518b3fe6cf251917cc0001f0e4d83948f370ba7ec4198ccfff04d1f
|
| 3 |
+
size 151005564
|
model_and_optim/__2_13.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a740ef150903500ca40e37e57521dd1fad4b76b0601f51bda4ce4562e1c3dee2
|
| 3 |
+
size 151005564
|
model_and_optim/__2_14.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32444cc5580db282db3ca17eceaa46cff65a943d31396ebeaafbf47b7fa9c308
|
| 3 |
+
size 151005564
|
model_and_optim/__2_15.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1706d91f254a427d5e635aa9749eceb50ff41db3f27c4109e0e5cc48bb6386b6
|
| 3 |
+
size 151005564
|
model_and_optim/__2_2.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7250b55553443793b734bdcf443da683be2b8e42c00c9fe407e45c507c82f642
|
| 3 |
+
size 151005564
|
model_and_optim/__2_3.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b44ad99fa2f99a8a6838c085c19b5bef7a3b723cd80c35ee764b09db7bb9ff17
|
| 3 |
+
size 151005564
|
model_and_optim/__2_4.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0614ca129c14258e6d73ecd31faf26fb4b6ecf8e4d1fbd0eed63a0c8f5e31962
|
| 3 |
+
size 151005564
|
model_and_optim/__2_5.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:406971458d744bfe10bed6068409a36c87242c2e30c64552dee52098739a7bd6
|
| 3 |
+
size 151005564
|
model_and_optim/__2_6.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90d551a440c35b04a61c20f4aac8983b567f86215ed7e05905dabfc35c74f599
|
| 3 |
+
size 151005564
|