quant_stage: | |
quant_modifiers: | |
AWQModifier: | |
config_groups: | |
group_0: | |
targets: [Linear] | |
weights: | |
num_bits: 4 | |
type: int | |
symmetric: true | |
group_size: 32 | |
strategy: group | |
block_structure: null | |
dynamic: false | |
actorder: null | |
observer: mse | |
observer_kwargs: {} | |
input_activations: null | |
output_activations: null | |
format: null | |
targets: [Linear] | |
ignore: [word_embeddings, 're:model.layers.0[.].*', 're:.*input_layernorm', 're:.*post_attention_layernorm', | |
're:.*layernorm.*', 're:.*attention[.]dense', 're:.*shared_experts.*', 're:.*mlp[.]gate.*', | |
model.norm, lm_head] | |
mappings: | |
- smooth_layer: re:.*input_layernorm | |
balance_layers: ['re:.*query_key_value'] | |
- smooth_layer: re:.*post_attention_layernorm | |
balance_layers: ['re:.*gate_proj', 're:.*up_proj'] | |
- smooth_layer: re:.*mlp[.]experts[.]\d+[.]up_proj$ | |
balance_layers: ['re:.*mlp[.]experts[.]\d+[.]down_proj$'] | |
duo_scaling: true | |