Add files using upload-large-folder tool
Browse files- axolotl/stratos.yaml +96 -0
- pytorch_model-00001-of-00030.bin +1 -1
- pytorch_model-00002-of-00030.bin +1 -1
- pytorch_model-00003-of-00030.bin +1 -1
- pytorch_model-00004-of-00030.bin +1 -1
- pytorch_model-00005-of-00030.bin +1 -1
- pytorch_model-00006-of-00030.bin +1 -1
- pytorch_model-00007-of-00030.bin +1 -1
- pytorch_model-00008-of-00030.bin +1 -1
- pytorch_model-00009-of-00030.bin +1 -1
- pytorch_model-00010-of-00030.bin +1 -1
- pytorch_model-00011-of-00030.bin +1 -1
- pytorch_model-00012-of-00030.bin +1 -1
- pytorch_model-00013-of-00030.bin +1 -1
- pytorch_model-00014-of-00030.bin +1 -1
- pytorch_model-00015-of-00030.bin +1 -1
- pytorch_model-00016-of-00030.bin +1 -1
- pytorch_model-00017-of-00030.bin +1 -1
- pytorch_model-00018-of-00030.bin +1 -1
- pytorch_model-00019-of-00030.bin +1 -1
- pytorch_model-00020-of-00030.bin +1 -1
- pytorch_model-00021-of-00030.bin +1 -1
- pytorch_model-00022-of-00030.bin +1 -1
- pytorch_model-00023-of-00030.bin +1 -1
- pytorch_model-00024-of-00030.bin +1 -1
- pytorch_model-00025-of-00030.bin +1 -1
- pytorch_model-00026-of-00030.bin +1 -1
- pytorch_model-00027-of-00030.bin +1 -1
- pytorch_model-00028-of-00030.bin +1 -1
- pytorch_model-00029-of-00030.bin +1 -1
- pytorch_model-00030-of-00030.bin +1 -1
axolotl/stratos.yaml
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model: meta-llama/Llama-3.1-70B
|
2 |
+
# Automatically upload checkpoint and final model to HF
|
3 |
+
# hub_model_id: username/custom_model_name
|
4 |
+
#
|
5 |
+
|
6 |
+
plugins:
|
7 |
+
- axolotl.integrations.liger.LigerPlugin
|
8 |
+
- axolotl.integrations.spectrum.SpectrumPlugin
|
9 |
+
spectrum_top_fraction: 0.5
|
10 |
+
spectrum_model_name: meta-llama/Meta-Llama-3.1-70B
|
11 |
+
liger_rope: true
|
12 |
+
liger_rms_norm: true
|
13 |
+
liger_glu_activation: true
|
14 |
+
liger_fused_linear_cross_entropy: true
|
15 |
+
|
16 |
+
strict: false
|
17 |
+
|
18 |
+
chat_template: llama3
|
19 |
+
datasets:
|
20 |
+
- path: bespokelabs/Bespoke-Stratos-17k
|
21 |
+
field_messages: conversations
|
22 |
+
message_property_mappings:
|
23 |
+
content: value
|
24 |
+
role: from
|
25 |
+
split: train
|
26 |
+
type: chat_template
|
27 |
+
dataset_prepared_path: last_run_prepared
|
28 |
+
val_set_size: 0.0
|
29 |
+
output_dir: ./outputs/out/reasoning-70b-stratos
|
30 |
+
save_safetensors: false
|
31 |
+
|
32 |
+
wandb_project: reasoning-70b-stratos
|
33 |
+
wandb_entity: axolotl-ai
|
34 |
+
wandb_watch:
|
35 |
+
wandb_name:
|
36 |
+
wandb_log_model:
|
37 |
+
|
38 |
+
sequence_len: 16384
|
39 |
+
sample_packing: true
|
40 |
+
pad_to_sequence_len: true
|
41 |
+
|
42 |
+
gradient_accumulation_steps: 1
|
43 |
+
micro_batch_size: 4
|
44 |
+
num_epochs: 3
|
45 |
+
optimizer: adamw_torch_fused
|
46 |
+
lr_scheduler: cosine
|
47 |
+
learning_rate: 2.0e-7
|
48 |
+
max_grad_norm: 1.0
|
49 |
+
|
50 |
+
train_on_inputs: false
|
51 |
+
group_by_length: false
|
52 |
+
bf16: true
|
53 |
+
tf32: true
|
54 |
+
|
55 |
+
gradient_checkpointing: unsloth
|
56 |
+
gradient_checkpointing_kwargs:
|
57 |
+
use_reentrant: true
|
58 |
+
logging_steps: 1
|
59 |
+
flash_attention: true
|
60 |
+
|
61 |
+
warmup_steps: 20
|
62 |
+
evals_per_epoch: 4
|
63 |
+
saves_per_epoch: 2
|
64 |
+
weight_decay: 0.01
|
65 |
+
deepspeed: deepspeed_configs/zero3_bf16_cpuoffload_params.json
|
66 |
+
_fsdp_final_state_dict_type: SHARDED_STATE_DICT
|
67 |
+
_fsdp:
|
68 |
+
- full_shard
|
69 |
+
- auto_wrap
|
70 |
+
_fsdp_config:
|
71 |
+
fsdp_limit_all_gathers: true
|
72 |
+
fsdp_sync_module_states: true
|
73 |
+
fsdp_offload_params: true
|
74 |
+
fsdp_use_orig_params: true
|
75 |
+
fsdp_cpu_ram_efficient_loading: true
|
76 |
+
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
|
77 |
+
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
|
78 |
+
fsdp_state_dict_type: SHARDED_STATE_DICT
|
79 |
+
fsdp_sharding_strategy: FULL_SHARD
|
80 |
+
fsdp_backward_prefetch: BACKWARD_PRE
|
81 |
+
special_tokens:
|
82 |
+
pad_token: <|finetune_right_pad_id|>
|
83 |
+
tokenizer_overrides:
|
84 |
+
128011: <think>
|
85 |
+
128012: </think>
|
86 |
+
128013: <|begin_of_thought|>
|
87 |
+
128014: <|end_of_thought|>
|
88 |
+
128015: <|begin_of_solution|>
|
89 |
+
128016: <|end_of_solution|>
|
90 |
+
fix_untrained_tokens:
|
91 |
+
- 128011
|
92 |
+
- 128012
|
93 |
+
- 128013
|
94 |
+
- 128014
|
95 |
+
- 128015
|
96 |
+
- 128016
|
pytorch_model-00001-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4584412848
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d4ce00d8bb306781bb02cac54596f2bb117caffc1c4a8499b754da27942e322
|
3 |
size 4584412848
|
pytorch_model-00002-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173598
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58268213279d186c83eaaeafd2d6156452f67f2e81f07806e86d3c8371983a36
|
3 |
size 4664173598
|
pytorch_model-00003-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999717966
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5823709c581bc73f2ed7c277075776c62ef3f02a62ac07da4112441bde267b4d
|
3 |
size 4999717966
|
pytorch_model-00004-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4966162946
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f2ed9040355585ab25205753ff8e352f1bcf598a36c13580ff6b5970840aeba
|
3 |
size 4966162946
|
pytorch_model-00005-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664140254
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ab6cb09544c9ca687192b443a7956b9640c96fd2d472d69977533838604d34c
|
3 |
size 4664140254
|
pytorch_model-00006-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e9865b69355e32f505896c1ab6ebcfbdc4eb3e5aa21faeffadd3c82c7333c68
|
3 |
size 4664173662
|
pytorch_model-00007-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60e79cf0374832368364044031ef0b7ca6f5390d815f780e84148e5b7325b000
|
3 |
size 4664173662
|
pytorch_model-00008-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999718030
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:147b8e1df7e393d21fb1c08fdb3f0162920adf16d93275e67149c59634d517f8
|
3 |
size 4999718030
|
pytorch_model-00009-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4966162946
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef2fe3c3ea6f166b27472528fd0f23c0957d62eb179deb937481c05c66f10a5d
|
3 |
size 4966162946
|
pytorch_model-00010-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664140254
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d2dfcbecd52ee8d0bb2c9238252b971a858e6304a8126fbd7bacba919056f3c
|
3 |
size 4664140254
|
pytorch_model-00011-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97a1edc0610cb5fab9f9fa24aa744d06f7b281dccd7590c8bd37e22efa35f016
|
3 |
size 4664173662
|
pytorch_model-00012-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae24e4510d469141f8d748d5c541533a9b17a4cd7ee08eb918016d089f0d4f03
|
3 |
size 4664173662
|
pytorch_model-00013-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999718030
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:781f2ddbae89c4d05011a340d766c4670a1206bf6a2fc7169bb1b2ef39046847
|
3 |
size 4999718030
|
pytorch_model-00014-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4966162946
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9827ebf41fd1359c7f7609daa40c5461ce3ec0fd19bac843da3ba85e5381a61b
|
3 |
size 4966162946
|
pytorch_model-00015-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664140254
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bd2271f51875384bb3d8cde5ea3bf27c0e52a51e9ebe5b2d290aa4955f434cf
|
3 |
size 4664140254
|
pytorch_model-00016-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eef98ffe86c17c6c4a90a0c6ebbff92bce225362e82b445c7f6a363d644c069
|
3 |
size 4664173662
|
pytorch_model-00017-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:974f0d3db11c6401483d50f1e6c8050f1d3acd7dcda9cfec4260967a4359b8e5
|
3 |
size 4664173662
|
pytorch_model-00018-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999718030
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06122e03b71e51e011b6f2210e3062a92e6ffbdcf92121626f70bcb0f99c8c78
|
3 |
size 4999718030
|
pytorch_model-00019-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4966162946
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37bd0c033fc617f0097be1578246c62c009d2ce080d5e805bfbcb365db10effc
|
3 |
size 4966162946
|
pytorch_model-00020-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664140254
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70d317cc99f592ecf8fe27fe254bfee1d5e0a2c18aad73e355a36d4b47ab2dee
|
3 |
size 4664140254
|
pytorch_model-00021-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87e79f61e9b6bffa60aaad7257185e0efc6a398d6c17a1412c73ae57bedacb18
|
3 |
size 4664173662
|
pytorch_model-00022-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d1382db9ce7ef50aa5d56d1f9044b7723f5f11bccc14b5df2d423b456a69d22
|
3 |
size 4664173662
|
pytorch_model-00023-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999718030
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01ae155dce3c5ba469dfbb66e0e7d401c02c770764d84dfc0b71425a696d6d85
|
3 |
size 4999718030
|
pytorch_model-00024-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4966162946
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d930d2ed5480d2591c393e4720e6c31b239a257de8327f37c9fbd06e89543bce
|
3 |
size 4966162946
|
pytorch_model-00025-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664140254
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f99b981a28754eb925037ada3d861a5e40b5aa8f54cab6a399a46e3717b7dee
|
3 |
size 4664140254
|
pytorch_model-00026-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfb3a660becaf02035b0b743c13ff4e007603cb13ab79cffdb9de0ae2638e1c7
|
3 |
size 4664173662
|
pytorch_model-00027-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4664173662
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef941b1bf189b1374e6e0ea6c8aa3714c40e254e7bf4d1a69f60b1e4d03f1577
|
3 |
size 4664173662
|
pytorch_model-00028-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999718030
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:081f5f99c4b68635f27fde96653eb3d3a42779fb9e0e9ce9dd431589e4244b0c
|
3 |
size 4999718030
|
pytorch_model-00029-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4966179604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5919f7eb3191f2ffb91d4caca995aa8d143999ac7c1641fcf9be009264a92c
|
3 |
size 4966179604
|
pytorch_model-00030-of-00030.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2101347717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff41e9bc29be5429590e6e88840707d0c481f66f0094edd49ec6110e7fcfe10f
|
3 |
size 2101347717
|