|
{ |
|
"measurement": { |
|
"model.layers.0": { |
|
"accuracy": 0.9718493807595223, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.1": { |
|
"accuracy": 0.9743924007634632, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.2": { |
|
"accuracy": 0.9789639118534978, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.3": { |
|
"accuracy": 0.9701395294832764, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.4": { |
|
"accuracy": 0.9654985481756739, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.5": { |
|
"accuracy": 0.9670190012693638, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.6": { |
|
"accuracy": 0.9665761806536466, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.7": { |
|
"accuracy": 0.9742341495730216, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.8": { |
|
"accuracy": 0.9681875999522163, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.9": { |
|
"accuracy": 0.9690405392611865, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.10": { |
|
"accuracy": 0.9752317072998267, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.11": { |
|
"accuracy": 0.9653666304366197, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.12": { |
|
"accuracy": 0.9655183832655894, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.13": { |
|
"accuracy": 0.9703059931489406, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.14": { |
|
"accuracy": 0.972356626050896, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.15": { |
|
"accuracy": 0.9726598827110138, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.16": { |
|
"accuracy": 0.9713281676522456, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.17": { |
|
"accuracy": 0.9732002995733637, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.18": { |
|
"accuracy": 0.9698466830159305, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.19": { |
|
"accuracy": 0.9728071658173576, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.20": { |
|
"accuracy": 0.9659418970040861, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.21": { |
|
"accuracy": 0.9602713624117314, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.22": { |
|
"accuracy": 0.9711931099518551, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.23": { |
|
"accuracy": 0.9665514267544495, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.24": { |
|
"accuracy": 0.9678931769958581, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.25": { |
|
"accuracy": 0.9665456048605847, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.26": { |
|
"accuracy": 0.9651254423079081, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.27": { |
|
"accuracy": 0.9715138816682156, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.28": { |
|
"accuracy": 0.9675782685808372, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.29": { |
|
"accuracy": 0.9714089153130772, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.30": { |
|
"accuracy": 0.9718951306713279, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.31": { |
|
"accuracy": 0.9652374170400435, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.32": { |
|
"accuracy": 0.9640791268902831, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.33": { |
|
"accuracy": 0.9718027484268532, |
|
"total_bits": 1424424960.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 32 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.34": { |
|
"accuracy": 0.9841037099831738, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.35": { |
|
"accuracy": 0.9970002071495401, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.36": { |
|
"accuracy": 0.9985264171846211, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.37": { |
|
"accuracy": 0.9979474495630711, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.38": { |
|
"accuracy": 0.9980866985861212, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
}, |
|
"model.layers.39": { |
|
"accuracy": 0.9991928795352578, |
|
"total_bits": 1347010560.0, |
|
"o_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"down_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"q_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"k_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"v_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"gate_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
}, |
|
"up_proj": { |
|
"group_size": { |
|
"4": 128 |
|
}, |
|
"bits": [ |
|
4 |
|
], |
|
"bits_prop": [ |
|
1 |
|
], |
|
"scale_bits": 4, |
|
"scale_groups:": 32 |
|
} |
|
} |
|
} |
|
} |