
Found 4 Bunk Layers, Removed 3 without training yields a perfectly send-able Model with less requirements and more speed.
4c2ac8f
verified
# slices: | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [0, 28] | |
######## [0, 28] Is good because layer 28 does nothing when removed | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [29, 32] | |
# slices: | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [0, 27] | |
# ####### [0, 27] Is good because layer 27 does nothing when removed | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [28, 32] | |
# slices: | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [0, 26] | |
# ####### [0, 26] Is good because layer 26 does nothing when removed | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [27, 32] | |
# slices: | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [0, 25] | |
# ####### [0, 25] Is good because layer 25 does nothing when removed | |
# - sources: | |
# - model: TroyDoesAI/Mermaid-Llama-3-8B | |
# layer_range: [26, 32] | |
slices: | |
- sources: | |
- model: TroyDoesAI/Mermaid-Llama-3-8B | |
layer_range: [0, 25] | |
- sources: | |
- model: TroyDoesAI/Mermaid-Llama-3-8B | |
layer_range: [26, 27] | |
- sources: | |
- model: TroyDoesAI/Mermaid-Llama-3-8B | |
layer_range: [29, 32] | |
# layer 31 / 32 is syntax layer around middle of output extra > for edges | |
# Layer 29 is syntax layer around middle of input for semi colon and colon mixup | |
# TODO: Layer 28 Does NOTHING | |
# Layer 27 Does NOTHING | |
# Layer 26 Does NOTHING | |
# Layer 25 Does NOTHING | |
merge_method: passthrough | |
dtype: float16 | |