winglian commited on
Commit
fc98a8d
·
verified ·
1 Parent(s): 5b0050a

Add files using upload-large-folder tool

Browse files
axolotl/stratos.yaml ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: meta-llama/Llama-3.1-70B
2
+ # Automatically upload checkpoint and final model to HF
3
+ # hub_model_id: username/custom_model_name
4
+ #
5
+
6
+ plugins:
7
+ - axolotl.integrations.liger.LigerPlugin
8
+ - axolotl.integrations.spectrum.SpectrumPlugin
9
+ spectrum_top_fraction: 0.5
10
+ spectrum_model_name: meta-llama/Meta-Llama-3.1-70B
11
+ liger_rope: true
12
+ liger_rms_norm: true
13
+ liger_glu_activation: true
14
+ liger_fused_linear_cross_entropy: true
15
+
16
+ strict: false
17
+
18
+ chat_template: llama3
19
+ datasets:
20
+ - path: bespokelabs/Bespoke-Stratos-17k
21
+ field_messages: conversations
22
+ message_property_mappings:
23
+ content: value
24
+ role: from
25
+ split: train
26
+ type: chat_template
27
+ dataset_prepared_path: last_run_prepared
28
+ val_set_size: 0.0
29
+ output_dir: ./outputs/out/reasoning-70b-stratos
30
+ save_safetensors: false
31
+
32
+ wandb_project: reasoning-70b-stratos
33
+ wandb_entity: axolotl-ai
34
+ wandb_watch:
35
+ wandb_name:
36
+ wandb_log_model:
37
+
38
+ sequence_len: 16384
39
+ sample_packing: true
40
+ pad_to_sequence_len: true
41
+
42
+ gradient_accumulation_steps: 1
43
+ micro_batch_size: 4
44
+ num_epochs: 3
45
+ optimizer: adamw_torch_fused
46
+ lr_scheduler: cosine
47
+ learning_rate: 2.0e-7
48
+ max_grad_norm: 1.0
49
+
50
+ train_on_inputs: false
51
+ group_by_length: false
52
+ bf16: true
53
+ tf32: true
54
+
55
+ gradient_checkpointing: unsloth
56
+ gradient_checkpointing_kwargs:
57
+ use_reentrant: true
58
+ logging_steps: 1
59
+ flash_attention: true
60
+
61
+ warmup_steps: 20
62
+ evals_per_epoch: 4
63
+ saves_per_epoch: 2
64
+ weight_decay: 0.01
65
+ deepspeed: deepspeed_configs/zero3_bf16_cpuoffload_params.json
66
+ _fsdp_final_state_dict_type: SHARDED_STATE_DICT
67
+ _fsdp:
68
+ - full_shard
69
+ - auto_wrap
70
+ _fsdp_config:
71
+ fsdp_limit_all_gathers: true
72
+ fsdp_sync_module_states: true
73
+ fsdp_offload_params: true
74
+ fsdp_use_orig_params: true
75
+ fsdp_cpu_ram_efficient_loading: true
76
+ fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
77
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
78
+ fsdp_state_dict_type: SHARDED_STATE_DICT
79
+ fsdp_sharding_strategy: FULL_SHARD
80
+ fsdp_backward_prefetch: BACKWARD_PRE
81
+ special_tokens:
82
+ pad_token: <|finetune_right_pad_id|>
83
+ tokenizer_overrides:
84
+ 128011: <think>
85
+ 128012: </think>
86
+ 128013: <|begin_of_thought|>
87
+ 128014: <|end_of_thought|>
88
+ 128015: <|begin_of_solution|>
89
+ 128016: <|end_of_solution|>
90
+ fix_untrained_tokens:
91
+ - 128011
92
+ - 128012
93
+ - 128013
94
+ - 128014
95
+ - 128015
96
+ - 128016
pytorch_model-00001-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a34bb8e73d1d08f124f360b28b5bef065a8a79516a428cbd85fc1fd92e0b79b
3
  size 4584412848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d4ce00d8bb306781bb02cac54596f2bb117caffc1c4a8499b754da27942e322
3
  size 4584412848
pytorch_model-00002-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d22b04e3414cfdb99f010e709c81c52c3a7f522874ffacbcebd44874754167ee
3
  size 4664173598
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58268213279d186c83eaaeafd2d6156452f67f2e81f07806e86d3c8371983a36
3
  size 4664173598
pytorch_model-00003-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bc0201ef7515c9f34b77eb6dbb2e51c06087a0fc9d591ac38cbf371c2823630
3
  size 4999717966
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5823709c581bc73f2ed7c277075776c62ef3f02a62ac07da4112441bde267b4d
3
  size 4999717966
pytorch_model-00004-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa3ddd3c67e19c4287bc647f1010e600e1f1ea482c47c0bbe30310ab57f122fb
3
  size 4966162946
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f2ed9040355585ab25205753ff8e352f1bcf598a36c13580ff6b5970840aeba
3
  size 4966162946
pytorch_model-00005-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d080590d027b804caca23832c6c17cb4e90b8b863c8e7c00d79c06dc6222e56
3
  size 4664140254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ab6cb09544c9ca687192b443a7956b9640c96fd2d472d69977533838604d34c
3
  size 4664140254
pytorch_model-00006-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01fd9c3bd64aba88ec3659c345f339f3af3426e27d658e142099aac78c83cd6a
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e9865b69355e32f505896c1ab6ebcfbdc4eb3e5aa21faeffadd3c82c7333c68
3
  size 4664173662
pytorch_model-00007-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7a2a0ccd20d38700f4a16d1d2586b871aac1b8b344644f7f3eb481f189070b8
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e79cf0374832368364044031ef0b7ca6f5390d815f780e84148e5b7325b000
3
  size 4664173662
pytorch_model-00008-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19160d9b4339c7282d7997e5a921353df50dd4ca04aa65529d672f419918484c
3
  size 4999718030
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:147b8e1df7e393d21fb1c08fdb3f0162920adf16d93275e67149c59634d517f8
3
  size 4999718030
pytorch_model-00009-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c95b1edc1b38cee0bb05377a1e34f5f3972f94bbf4484fb206ad115c552f0640
3
  size 4966162946
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef2fe3c3ea6f166b27472528fd0f23c0957d62eb179deb937481c05c66f10a5d
3
  size 4966162946
pytorch_model-00010-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84212ebf16cf8087e728cd8bf8b0f56350d3ba7b34b0417d059a574d35e15fe2
3
  size 4664140254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2dfcbecd52ee8d0bb2c9238252b971a858e6304a8126fbd7bacba919056f3c
3
  size 4664140254
pytorch_model-00011-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66f395bdb92fe7314aa8044d6dbecee4eb807223c89861835eb5939811fc2374
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97a1edc0610cb5fab9f9fa24aa744d06f7b281dccd7590c8bd37e22efa35f016
3
  size 4664173662
pytorch_model-00012-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66c619e10dfc6d59f666a243d88ffae038f91b05242f809c1fc425b16e584053
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae24e4510d469141f8d748d5c541533a9b17a4cd7ee08eb918016d089f0d4f03
3
  size 4664173662
pytorch_model-00013-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93c456cb948fa394facff864c94668b61ec00e2370a2b65f12b7447ed96416d0
3
  size 4999718030
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781f2ddbae89c4d05011a340d766c4670a1206bf6a2fc7169bb1b2ef39046847
3
  size 4999718030
pytorch_model-00014-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0aee64fcd4fa13d6dbf79c21d89636e02d45f5e74190dfc96c381440a8834820
3
  size 4966162946
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9827ebf41fd1359c7f7609daa40c5461ce3ec0fd19bac843da3ba85e5381a61b
3
  size 4966162946
pytorch_model-00015-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb4461e9a45598a3eed0e44a87ebb13a22bc8f73a1850b0b86053d0b3255bc6f
3
  size 4664140254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bd2271f51875384bb3d8cde5ea3bf27c0e52a51e9ebe5b2d290aa4955f434cf
3
  size 4664140254
pytorch_model-00016-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d052b56df8bf0956a9389dc080330f1d061db1aca86a4bd5c9c601ed4b90e9c9
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eef98ffe86c17c6c4a90a0c6ebbff92bce225362e82b445c7f6a363d644c069
3
  size 4664173662
pytorch_model-00017-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f1d2add4215890ff1d7455d8d6f4033da9c925991acbb11aa2d5e2c507e027a
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974f0d3db11c6401483d50f1e6c8050f1d3acd7dcda9cfec4260967a4359b8e5
3
  size 4664173662
pytorch_model-00018-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94d0406b4fe2903a4f3eba0470b5135b0179644226c19c1b4e5df9f3b5996fbc
3
  size 4999718030
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06122e03b71e51e011b6f2210e3062a92e6ffbdcf92121626f70bcb0f99c8c78
3
  size 4999718030
pytorch_model-00019-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37e0b5fdb37bd49f64958158eb1a60c7274164dce9fab34a57e68566e40d392
3
  size 4966162946
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37bd0c033fc617f0097be1578246c62c009d2ce080d5e805bfbcb365db10effc
3
  size 4966162946
pytorch_model-00020-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eba67c4ddf931f26e2981ba679b535e7c4978540c959fbd147a2311cb2ab5fa7
3
  size 4664140254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d317cc99f592ecf8fe27fe254bfee1d5e0a2c18aad73e355a36d4b47ab2dee
3
  size 4664140254
pytorch_model-00021-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:578a0d4a2c1fb29bfe7f39b056fd4113be02d8ceff893ce3c7ad7059857b2679
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e79f61e9b6bffa60aaad7257185e0efc6a398d6c17a1412c73ae57bedacb18
3
  size 4664173662
pytorch_model-00022-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adf5b26aa5ab8a076359b71f62042e49dcfe7e2f75c2d931dc277cbfc499145c
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1382db9ce7ef50aa5d56d1f9044b7723f5f11bccc14b5df2d423b456a69d22
3
  size 4664173662
pytorch_model-00023-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:924cdd5fd2fc3dd0779401ccc321ed5005d4b0e5fa1fe8da95b719cde88934b8
3
  size 4999718030
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ae155dce3c5ba469dfbb66e0e7d401c02c770764d84dfc0b71425a696d6d85
3
  size 4999718030
pytorch_model-00024-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28cbf45d9cb9d72bdcefe9e6b8443d4ddad63bbaaff1df12505463e98297c838
3
  size 4966162946
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d930d2ed5480d2591c393e4720e6c31b239a257de8327f37c9fbd06e89543bce
3
  size 4966162946
pytorch_model-00025-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ca3ff2a2a1000246617e6c68d4f19e0423b48cf086649b1fc7c4fa7bc7f798a
3
  size 4664140254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f99b981a28754eb925037ada3d861a5e40b5aa8f54cab6a399a46e3717b7dee
3
  size 4664140254
pytorch_model-00026-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bba302c2ef4191a6fd7bf0ce1294de32f87fdba1e83c67f05f55bfa48e24bc32
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb3a660becaf02035b0b743c13ff4e007603cb13ab79cffdb9de0ae2638e1c7
3
  size 4664173662
pytorch_model-00027-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f6d1607885aba5d9899d43f630650e1ff173a66d70e97eb999335663b8e3c61
3
  size 4664173662
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef941b1bf189b1374e6e0ea6c8aa3714c40e254e7bf4d1a69f60b1e4d03f1577
3
  size 4664173662
pytorch_model-00028-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df579b5de1ba22d60b1b6372976d0139c45874676e8160d50ea1af772b002dc9
3
  size 4999718030
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081f5f99c4b68635f27fde96653eb3d3a42779fb9e0e9ce9dd431589e4244b0c
3
  size 4999718030
pytorch_model-00029-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a2d1fbfaf8d37de754c07a4536fb9b2f3bcceae6f852b0b78ede8c0912702d5
3
  size 4966179604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b5919f7eb3191f2ffb91d4caca995aa8d143999ac7c1641fcf9be009264a92c
3
  size 4966179604
pytorch_model-00030-of-00030.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:720c87013f072d30e1d163413b6bf71297f0bb3525cfe5f5241523c46d20ee9f
3
  size 2101347717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff41e9bc29be5429590e6e88840707d0c481f66f0094edd49ec6110e7fcfe10f
3
  size 2101347717