Ling-mini-2.0-AWQ-4bit / recipe.yaml
cpatonn's picture
Upload folder using huggingface_hub
4e1bc29 verified
quant_stage:
quant_modifiers:
AWQModifier:
config_groups:
group_0:
targets: [Linear]
weights:
num_bits: 4
type: int
symmetric: true
group_size: 32
strategy: group
block_structure: null
dynamic: false
actorder: null
observer: mse
observer_kwargs: {}
input_activations: null
output_activations: null
format: null
targets: [Linear]
ignore: [word_embeddings, 're:model.layers.0[.].*', 're:.*input_layernorm', 're:.*post_attention_layernorm',
're:.*layernorm.*', 're:.*attention[.]dense', 're:.*shared_experts.*', 're:.*mlp[.]gate.*',
model.norm, lm_head]
mappings:
- smooth_layer: re:.*input_layernorm
balance_layers: ['re:.*query_key_value']
- smooth_layer: re:.*post_attention_layernorm
balance_layers: ['re:.*gate_proj', 're:.*up_proj']
- smooth_layer: re:.*mlp[.]experts[.]\d+[.]up_proj$
balance_layers: ['re:.*mlp[.]experts[.]\d+[.]down_proj$']
duo_scaling: true