diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,93655 @@ +{ + "last_module_idx": 98, + "measurement": { + "lm_head.linear": null, + "model.layers.0.mlp": [ + { + "accuracy": 0.9095431315271478, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9104038414202238, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9397065074820268, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9559445475277147, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.952213632433038, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.956330396627125, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705135979150471, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727190576101604, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762446707800815, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975619513737528, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789005941466281, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987524972934472, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894403415290933, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928167430978072, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938260579579755, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973148664361552, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978699335142186, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.0.self_attn": [ + { + "accuracy": 0.8208000910909552, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8382653813613088, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.8513023727818539, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9124351363433034, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9153880508322465, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9159334145094219, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.948160027202807, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9485482667621813, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9522968562025773, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9556810448044225, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9569539239532069, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600823521614075, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624500839333785, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9655666100351434, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791620342355025, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827107614592502, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986356888946734, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895002065520537, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996433572549569, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9779001819460016, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830560637147803, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828760247481497, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829742218318739, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922951914762196, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932637951875988, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932447020944796, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943084873651203, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948467431884063, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961391632494173, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967542765171904, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979446691117788, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982534950893176, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982551859789773, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987520964718178, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987521306856683, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990571162810451, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.9839907806170615, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869467733721984, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882964829080984, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929478811590295, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930316829367688, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931960599987131, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948193321102544, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950314403364533, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995475033004033, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956503561452815, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965575898164197, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968178217348299, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968382049547998, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971217113105875, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982341484803903, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985254715735975, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986180985640538, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991671688070423, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995647993821063, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9792325653527912, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799017498367711, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831970208569577, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842431121750882, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894250905827472, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903486653378135, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917629792502052, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944373992712874, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949691836771212, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946016673194734, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953084662556648, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972479984556374, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976588452332898, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984925636335423, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985990095883608, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989018275549537, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995946902781725, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9908933443458456, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913652798062876, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921750669416628, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940439595987922, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953630786193045, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954563147927585, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997155448323802, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972667978390267, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976002140656898, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977227316090935, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977158364888868, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979083292970532, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998021055993281, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982113748004562, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989035374632007, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991094213175146, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992533701128865, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995267894119024, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999791790167556, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.9798009050519842, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804112158323589, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836185511789823, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846025501426897, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897871025298771, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906396999170906, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919944719264382, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946864666907411, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951674322548666, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948010530910993, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954699579822389, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973547929211667, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977452147163843, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985641073435545, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986544799451765, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989286329793302, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999620612392104, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9887626445607135, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896380579785297, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905819022341779, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932316722054231, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944502205440873, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946017551578974, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963890161169203, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965684715854494, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968497674716147, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997052576392889, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972473506472612, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974731293163801, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976137162823426, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978491234544077, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986809720530322, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989143410991681, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990730303290644, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993874248313276, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997452961182908, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.977730216164338, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783697583173451, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818249862445029, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828868884789316, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886917236604189, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896193609425896, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911125223887595, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941517859697342, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946820488885829, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942438437750465, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949770989386659, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970763262949491, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975013964270291, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984281173858204, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985096889891123, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988090141039145, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995823384899842, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9890514949434682, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897562077170924, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907081346762808, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933212297527414, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945017703269657, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946166692595733, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966685250401497, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967993509612585, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970881993833342, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972797681234384, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972756367764974, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974909944361762, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976928702702648, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978924728929996, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987335446241655, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989393980879533, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991550526924824, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994231401697585, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997708428356993, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.9760630318993017, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767758014955019, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980530172586441, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816632396296451, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878462239315635, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.988883984716315, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904735692237553, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937015783629919, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942808221829565, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993788874855167, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946014626245749, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968295101272432, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973075489856695, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982900156786567, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983779818407799, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987033966340517, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995479457276432, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9885504363398803, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892982730739995, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903108250153693, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930161425941869, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942412823438644, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943498357346183, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964336431340167, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965651549006763, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968246621520895, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970244477061849, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971291271871642, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973322029568648, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975348689446324, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977432044320985, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986320381101809, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988654738194064, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990668201721028, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993624754348084, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997482318186054, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9754659157050284, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976201615835491, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801879101677945, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813726936516008, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875684231519699, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886315606142345, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902944094256351, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935554552235102, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941532737330386, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936469189430538, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944825599852362, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996760572061727, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972491872153784, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998253139618196, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983458048418948, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986911917987623, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995382047307334, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9880163238236779, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886634812543267, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896225380270105, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923705388056604, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939203168216505, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940354988763207, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962478315359667, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963968407950903, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967621657017031, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996913731294243, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970157338600409, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972390585431927, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974299032045039, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976384967173401, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985856201107565, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988113763300996, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990590777444212, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993509278681717, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997428843604499, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.9755651480273196, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763480377824683, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803259686419838, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815022004278082, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876607013376135, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887420958594272, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903806612679833, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993594128442438, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941797005502802, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936854686391982, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994524164811561, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967825459806543, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972694782834304, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982563786600765, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983508735895157, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986903119440141, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995296539150571, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.9866306844510531, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987229060185583, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884994508404481, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913577868750221, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933470471909172, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934529132748905, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961682573745125, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963113049927511, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965673194904077, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967787054024244, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966953663449538, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969473267464262, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972402702428793, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99743698321675, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984897038850346, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987284528385652, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999009791173433, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993091371204508, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997353895910477, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.9763095802382419, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770432977299941, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809529734285254, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821183210925052, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879726320505142, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989042812272122, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906774437741229, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937392363422796, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943325672494737, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938451577174036, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946713337772771, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968571894262966, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973416681352415, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982947262102052, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998393810599258, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987353025690505, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995432019135669, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9876672351046613, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884282797574997, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894900486657494, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923209697008133, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938649864573228, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939646469919305, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963211801491285, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964464918563241, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967191338931259, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969679685799699, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969597872542707, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971908076029075, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974059754688489, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976246990263462, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985642993920728, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988171381777838, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990586532573951, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993590305216218, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999744350850386, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.9770351914983046, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777439268011796, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814577165402865, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825700568525415, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883209474776921, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893500075528496, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909209656087976, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939241962213266, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944950310807479, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940204153719702, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948021910692516, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969445937558225, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974126298176614, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983446151018143, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984367675098934, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987645998205009, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995582970839582, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.9873641909737336, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879688008835441, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887865977851968, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991313174366951, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934952525716079, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993621537167775, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960491037682483, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996197081710163, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965950936863297, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969023536694678, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996838324948361, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997162898512263, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973375624731967, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975570067763329, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985172545635387, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987679766000885, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990162669043792, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993276708808384, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997311281530481, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.9772436744288394, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779552550692308, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816842330129523, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827878459503776, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884151871267118, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894538623721976, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910185493920979, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939562086212007, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945353089194549, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940657494099516, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948663139029553, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969651361829356, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997436534417303, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983510735787844, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984463700338414, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987714448453564, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995598482752317, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9859226330330497, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867471487898576, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878722146937722, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905749716256794, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930144509202555, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931647032499313, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956955415637869, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958375602176315, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960933837451433, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964982322172115, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996591029590682, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968344137856835, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970623160663404, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972848888290556, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983844803156037, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986462159768531, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989053082505339, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992418820528608, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996987976820061, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.9771186772145724, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778477659350947, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816209407229173, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982754299515172, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883155540416115, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893858675893984, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909671725411164, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938973811896223, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944796005361959, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940053220642241, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994824821619611, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969321920683509, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974142912971345, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983323879147831, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984293787887222, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987643684603666, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995556999193994, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9877102273075204, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883125377328772, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891810323062696, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915931648329684, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934880835445303, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935718394423786, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995834835657948, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959401445169198, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965345071334588, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966417970625978, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968538307829907, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970244584899199, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972030281236297, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975332047202086, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984964497975612, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987425429648474, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998987317281334, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992869156167695, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997173434760618, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9593021085387782, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638193406556782, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.964268960450825, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9675280389032865, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.983001111369384, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846741023816561, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847190207556674, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871207742314589, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887178697084126, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913978090411738, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926601005227942, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955580638427484, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962557856189577, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964157562506827, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997867946758082, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980489590058201, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985739120135182, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9646595245913455, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9691134001079359, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9707192872699938, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830528309470729, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832723862246463, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835037871410972, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990082489816766, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990351967905697, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910119881755427, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915316348013125, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915860005115208, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921388845694693, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926045748748278, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932144339147367, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959094540068978, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965791733641374, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973811699371589, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981523773779994, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992696187998119, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.9774949472201498, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782662579887792, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981975655806692, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830924021570306, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885064324266032, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895654226604261, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911310413950368, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939867536488333, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945548521844965, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941073252182258, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949087449594548, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969851576576108, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974549718593296, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998350811435988, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984549331037622, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987899002275968, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995549861831885, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9861915958555121, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869744228689294, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879942120690095, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908188275600734, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993121779278705, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932269250091753, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957491880968997, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995850701865397, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961592970710051, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996333868880021, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965401809466513, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967796528025678, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970182162758551, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972925968468189, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983757294126248, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986350996125686, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989004878229216, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992127360678033, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997000713391524, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.9782689982338956, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979009465167397, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826452732086182, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837367911087839, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889015218144969, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899110229391801, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914414780704599, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941768752116906, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947302886529973, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943035939022115, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950685450120976, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970831914167655, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975330776682025, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998406127977528, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985068700227299, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988346750798979, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995748673712737, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9848119820419111, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985868973167319, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864720843340221, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888379001303723, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922166239274176, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923569912973204, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956581443548203, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957016467263824, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962015967620047, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965127424189919, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964144971025618, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967373697773406, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969913143860666, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972661065035745, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983985185819236, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986281179283795, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989582625658888, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992691784803021, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997162733864235, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.9727192521095276, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753608719298714, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779694754826395, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789859103529077, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878051798594626, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893601842616734, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909675419330597, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938352704048157, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942817256638878, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940022269361898, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947771288846669, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969382386066412, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997343974482072, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982998290736425, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984152531741481, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987696514121795, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994727339791624, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.9859124171106439, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868846841548619, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987875402757996, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908912621046367, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929152123237911, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930509211201417, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957313168989984, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958789238804265, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962786620384768, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964324957446048, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964771804056669, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967303607416781, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969542367677939, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972067832162506, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982849994772359, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998595164402535, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988720080766239, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992327932268381, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996993428488311, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.9791801775756636, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798919495783354, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831745938250893, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841702172630712, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893047652746502, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903602011893925, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917297151527906, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943545750881496, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949221799248144, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944818345339674, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952710663017473, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997161092334672, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976280190442738, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984314396585289, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99854518365311, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988334718111315, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995755223361286, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.9853754247489729, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860670943009225, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871692633942554, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896840421777022, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925733930186221, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992711111118919, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957012690995869, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959533681210718, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963071330597526, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965250209758156, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996354761876558, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966237180327114, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996889985509609, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971437209138745, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983386928705793, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986061509698629, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988934683956598, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992404720304828, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999701244486986, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.9798662866416731, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805047543425309, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836916421589098, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.984649448018325, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897153095195168, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906430197389502, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920012347008053, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946209372658479, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951227715140895, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947248379650869, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954287770547365, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972987676921644, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977140828574959, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985255098068401, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986178175987381, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989058034200418, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996030936997972, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9862298996824967, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873072018748835, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98848993605689, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911212066286489, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932281327875037, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933062384003087, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956424863714921, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957211864622015, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961491787904188, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996426087068884, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966600309861334, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968941699909536, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970909825673229, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973242976947835, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984014305825296, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986745437705203, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988974455351892, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992368950165416, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996908468831527, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.9790809468219155, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797422509444388, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830149004333898, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840201610013058, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892828856643877, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990254111980137, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916700618831735, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943903817942268, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949125471083742, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945026110661658, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952370912620896, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971840430639292, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976173825562, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984623938798904, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985583086351031, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988601119502595, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995844245055004, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.9871192268635097, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987629370469796, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885610091058832, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910575025960019, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932238177249306, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934188770620447, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957010514642063, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995934889504784, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962049201130867, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964204908985841, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966204985976219, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968892500588769, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970056579301232, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973157022736574, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983109592607147, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986144784641894, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988363067570486, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992023263322679, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996717810434731, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.9782165147756275, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789493491775111, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824068467868002, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834947554688704, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887561445173464, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898078222023813, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913126214554435, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940523579716682, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946332993476015, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942052721192962, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950007466893447, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970077817377291, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974998231781157, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99831499355404, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984626283771113, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987783667288328, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994984259338755, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.986996118175356, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874811478351292, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881562669026224, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904945226092088, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934124711312746, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935280586543837, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957889251803097, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995920474592008, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962843295774961, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962992213274303, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967758459480185, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969722056075146, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971222926519419, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973106329378328, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983976863716778, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986577876108257, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989038500934839, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999250862355295, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996962183841357, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.9761734510722914, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770008466745678, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980735110609155, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819537354143042, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875275853433108, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889625585392902, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906299004429265, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934807494282722, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941410089009687, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937310018821767, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945942461490631, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967701411560962, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972953765015853, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981780446281558, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983349703252316, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986868473260027, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994372227474263, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.9844592150888944, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852966227029499, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866640787375601, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896803286514784, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991772030529223, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919021960936094, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945440327650622, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946743481253323, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953217392689303, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995557935222199, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959518085969122, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962666807206053, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965255448692724, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967176533843342, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980969748606807, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983747913257072, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986365573774827, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991057659254262, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996263435306517, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.9749278975160498, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9758838462202173, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798783189372012, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981221300990958, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871161831052679, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883124514629966, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900681313715483, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930621997306222, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937557440839315, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933953308745435, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994295787654425, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966207401532876, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971475885494759, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980937854239815, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982864931225777, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986709922944245, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994805451286467, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.9831734393772326, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841097420767734, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856031442943373, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887051496066546, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914747704016534, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916736687484541, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945370311799803, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947728377423788, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951804433214037, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995330101173175, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957542125331728, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960640527700123, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963149530322928, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966802565675033, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979883412781515, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998337587831836, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99855497881378, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990803581711493, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996103815440285, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.97347534016559, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744316025784141, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786604093877893, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800807457221182, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863636415255698, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876047278705397, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894761372553674, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927113487532264, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934236936663327, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930123914229242, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939574873761127, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964288822130153, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969782992021033, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980097328753847, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981857283334983, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985914604836389, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994594111156306, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.9820546492149955, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9828409929024545, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842858408626757, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879454993887952, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902165830135345, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904549357138182, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937701374292374, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940232501218194, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944909089489987, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948094079368993, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949697318829989, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953881882523236, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956024979290209, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959095089059127, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974884986877441, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979574056832414, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998094539697233, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998942185585436, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994939448998162, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9435053059929296, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9467060377723292, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.950687311197582, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.956754882084696, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721413913526034, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746950331487154, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778670543118527, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813463719267594, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836956952747545, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857435477407355, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987764091083878, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992664889285439, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938413163549021, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994926957707656, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964993705874995, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974870054345382, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982813753579792, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.949076837614963, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9547781567824514, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9571666999867088, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.974617031059767, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975929845320551, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976174417294954, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857322981483058, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859998100682309, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869606047868729, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877350793073052, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878755878461035, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886758319641414, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892763644456863, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901219457387924, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941115053860765, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950512051582336, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962698966264725, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973164751733604, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989966184489036, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9720919634166517, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9730570630023354, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.977530753926227, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790277888900355, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856976803980375, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869674099119086, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889586920800962, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923537326486487, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930947328868666, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926872833778984, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936604566479984, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996269262934986, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968345545624432, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979212017435777, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981078585903895, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985318305461031, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994339694416052, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9807009963612807, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816501187650781, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835421113591445, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987512386158893, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901080649150046, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903112085242021, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938942962571194, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941112148134332, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944353817324889, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947053625395423, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949003054123176, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952807751925368, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956323226031504, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960323779990798, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997582797157137, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979640035644958, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982800101371188, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989251994380826, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995255377633792, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.9696144869453028, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706310943553322, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.975539168244914, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772028640696877, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844172126368472, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858016403097856, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879954770991677, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916353806069023, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924497525942954, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992038410745169, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930948701344038, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959366733306333, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965507458699377, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977217895027838, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979467197860542, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984089211609802, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993756558059862, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.980680663334696, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812014369588149, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834164710421311, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868414841200176, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904304452632603, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990589376342924, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941020251104706, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943404429053005, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946115758073958, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949775263667107, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952469036767357, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954985548791132, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958924109998503, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963323681762344, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978212145598311, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981593073982942, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984924762852883, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990188473541486, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995935801906806, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.mlp": [ + { + "accuracy": 0.9667436982456007, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678161771673905, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732081231318022, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750420388422514, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829321974202206, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844070923955817, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868385807463997, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990895443056759, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917834667783034, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912692433909366, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992420516516033, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955432328738665, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962150678038597, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975276616843123, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977427009297045, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982527543447519, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99932664490648, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.32.self_attn": [ + { + "accuracy": 0.9771077632904053, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801676853706962, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820572467226731, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858201161811226, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889504909515381, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989287152102119, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99294534482454, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933287846414667, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940013803149524, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994268151098176, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945689341739604, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950827340546408, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995343530648633, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957615188077876, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974220497043509, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978689153335596, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980924625537897, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989182126561278, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994801843146744, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.mlp": [ + { + "accuracy": 0.9618346408793801, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.963058989298971, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9692066280465377, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713609344080875, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803612545916909, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821057601978904, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848881740319101, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894185905393801, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904563152476361, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989961608460075, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912980410613512, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99487436053, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956539943814278, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971113851980159, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974119780879271, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979956597089767, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992035170526881, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.33.self_attn": [ + { + "accuracy": 0.9778875645838285, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789611759938692, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812273257657101, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848252221157676, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885848125344828, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886686645056072, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992765569373181, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993039962492491, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939407506271413, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944476187229156, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994511470590767, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948755490936732, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953672227106596, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957839583880023, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975153009750342, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978795549587199, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982627818458959, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998904222132344, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995284420193026, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.mlp": [ + { + "accuracy": 0.9594143221252843, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606910473422, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670229240467674, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9692814726578561, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791214466094971, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809690933478507, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838441014289856, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887657879214538, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898700659212313, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989339670852611, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907559532868234, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945600436706292, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953847503975818, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969458327089485, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972526805573388, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978550701941314, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991560720495487, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.34.self_attn": [ + { + "accuracy": 0.9745561041330036, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760926726617312, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783745925677451, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981266482880241, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876216822548917, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879313597553655, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929194975840417, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933237504017981, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937058800929471, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939479745532337, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940078740841464, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944605909680065, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995180711934441, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995605856180191, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974273641250635, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978385881373757, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981488124712518, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989064504441462, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995089027246362, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.mlp": [ + { + "accuracy": 0.955908013017554, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9572666507018239, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9641004392975255, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9665658693564565, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772986455967552, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9793110113394888, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.982447475194931, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877768951027017, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889694954219618, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883872105887062, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989942269889932, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940662395797277, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949771581511748, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966583408807453, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969965733195606, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976433315559438, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990636831835696, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.35.self_attn": [ + { + "accuracy": 0.9719538500434474, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9748912735989219, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976899841898366, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818210962571596, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861271695086831, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864853730327204, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915478872625452, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919671855474773, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925813431802549, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993067196325252, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931079619809201, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937836084710924, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943432713809767, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947805467404818, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969136081635952, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973990421153998, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978583151180493, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986277703980082, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994177266367172, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.mlp": [ + { + "accuracy": 0.9517045993553965, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9531842219202142, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9605689331104881, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9632162890936199, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751748944583692, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772852831765225, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807038385617105, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866984000331477, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879809719951529, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873457637272383, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889811677368063, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935586421113265, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945078617648074, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964100781239962, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967527440504024, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974606329281079, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990111572765991, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.36.self_attn": [ + { + "accuracy": 0.9728055063046908, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734300845547726, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767786734982541, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805998911983088, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857611060142517, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862912689384661, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909030732355619, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916455628056275, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923783321129648, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929406423317758, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930601190579565, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936309237229196, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943164045873442, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950322034327608, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968631379306316, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974733845967996, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977245991559405, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986704739889032, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993552576358381, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.mlp": [ + { + "accuracy": 0.9488294689278853, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.950352176239616, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9579709266361437, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606156317811263, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737173209064885, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759182804509213, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979437065751929, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860052011514965, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873322381785041, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866071578703428, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883180872390145, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993182006635164, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941778033971786, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962214082479477, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965555236527794, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972710111423543, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989568463673717, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.37.self_attn": [ + { + "accuracy": 0.9716977477073669, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736415994794745, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763497192608682, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.980781980251011, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862824082374573, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864882920917711, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909532321126837, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913738699335801, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922210652577249, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926208759609022, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931778492111909, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937501373259645, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942788599353087, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948175016202425, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968239096434492, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973990595654437, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976390637457371, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985717043868805, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993513182393814, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.mlp": [ + { + "accuracy": 0.946040153503418, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9476107704012018, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9554226272984555, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9581046229914615, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9723093697899267, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745676956678692, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.978181431168004, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853382988979942, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867148901286878, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858800157120353, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876691855882344, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928156717827445, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938572198152542, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960586663923765, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963637453160787, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997103030548284, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989282428041885, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.38.self_attn": [ + { + "accuracy": 0.9742084302400288, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752293636924342, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776393457462913, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818249407567476, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869565634351027, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873386441092742, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918267570043865, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923437731830698, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929461165478355, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931785330960625, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935611913862982, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940654733463338, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945383067978056, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949723348805779, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969343762648734, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974931762798837, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977911500946471, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998654638760184, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994069555970398, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.mlp": [ + { + "accuracy": 0.9427335199556852, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9443703513396413, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9523798133197584, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9551271388405248, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9705450723045751, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.972954483408677, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976673949705927, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844144472950384, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858856467824233, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849913465349298, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868815847133335, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923648191125769, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934670823185068, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958136493438169, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961339960757055, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968852057660881, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988625250560673, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.39.self_attn": [ + { + "accuracy": 0.9720487908313149, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733522816708213, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.976446702292091, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809985380423697, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858064667174691, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.986013545801765, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911869983924063, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916178270390159, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924856502758829, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928537481709531, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930955351967561, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935359041157522, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942160554622349, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947945479499666, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967906292723981, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973974263197497, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977167738895667, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998581919034845, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993633455841949, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.9224686685361361, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.944839828892758, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.948503055070576, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9488140030911094, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883775593418824, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869611490713922, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873604209799516, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928163509619864, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944545971719843, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961608848289439, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970465692642488, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980313913210442, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998381237156297, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983190952947265, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989443107655174, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990969755147633, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992591800952428, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9430848171836451, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9476992456536544, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9512751949460883, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711446667972364, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.972278262439527, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9725656760366339, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841436320229581, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844352900981903, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.985526807998356, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863862630568052, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860132838550367, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869593334825415, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877611641821108, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887379901973825, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932709351966256, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943523003082526, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958510355729806, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969995198281187, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988893508715065, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.40.mlp": [ + { + "accuracy": 0.9406878258052626, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9423651130575883, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9504387849255612, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9531985709541723, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9695020914077759, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9719880129161634, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757180213928223, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838769984872717, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853959177669726, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844552921621423, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864074509394797, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992093803851228, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932289045107993, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956685034068007, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959958070202878, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967375019271123, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988230230580819, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.40.self_attn": [ + { + "accuracy": 0.9732305344782377, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743271344586423, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779821195100483, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812164698776445, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864453265541478, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870097848929857, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908971347306904, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914479247833553, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927351051255277, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.993086712140786, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933371841907501, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994093549094702, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994529128074646, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952040471528706, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969319703155443, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975980779058055, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975549204176978, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986721550751674, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993334807651607, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.41.mlp": [ + { + "accuracy": 0.9383808688113564, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9400748139933536, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9481898452106275, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9509035756713465, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682554633993852, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9708834510100516, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.974627108950364, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833093065964548, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848821069064894, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838006559171175, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858570067506087, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917474545930561, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929516527213549, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955024507484938, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958044831690035, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965390645359692, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987815098935052, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.41.self_attn": [ + { + "accuracy": 0.9730099000428852, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743973126536921, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.978019002236818, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814679026603699, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862165796129327, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867226559864847, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906932131240243, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913623929023743, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925990779148904, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928719503314871, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932593687584526, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940001807714763, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943652949050853, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951327599977192, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969514071157104, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975982396618316, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976563365443757, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998699779181104, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993468280391473, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.42.mlp": [ + { + "accuracy": 0.9357953259819433, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.937526313882125, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.945706373766849, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.948469064737621, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668696585454439, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696617691140426, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734235500034533, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825375707525956, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842075799640856, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98305449674004, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852405535547357, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913565100807893, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926402968795676, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952769765728399, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955986777418538, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963357193689597, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987146721074456, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.42.self_attn": [ + { + "accuracy": 0.9729985688862047, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747207149078971, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777972619784506, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807774393182052, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865552186965942, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871163619192023, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990258863097743, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990721618658618, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920699917956403, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925843485091862, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935352370927208, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943228140473366, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943782989131776, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995185165028823, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969061809150797, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976035772185576, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974769775412584, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998654715226669, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999307501178823, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.43.mlp": [ + { + "accuracy": 0.9333826679932444, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9352194133557772, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9435263307471025, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9463728666305542, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9655823895805761, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9685275617398714, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9723564982414246, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817920938918465, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835440124336042, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824042100655405, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846816737400857, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910151017339606, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923580092819113, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950699221931005, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954263450283753, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961708490001527, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986562672022142, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.43.self_attn": [ + { + "accuracy": 0.9690522175086173, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709687860388505, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9738449297453228, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781228084313242, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843062287882755, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846952510507483, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9891633987426758, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896047225123957, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905100213853937, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.991079087320127, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920853815580669, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992866584344914, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933566035408723, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940585960683069, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962484569926011, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970545435422346, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970168130178201, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982354682134954, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999208410515597, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.44.mlp": [ + { + "accuracy": 0.924536943435669, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9265484935358951, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9359960116838154, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9393423670216611, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606206824904994, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9641900815461811, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686757075159174, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9786915685001173, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811431451847679, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.979848115067733, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825298974388524, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989718455233072, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912705154795396, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943063721845025, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947592286687148, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956296607851982, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983815210626313, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.44.self_attn": [ + { + "accuracy": 0.9670546650886536, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687775373458862, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9723382341234308, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775013939330452, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823716643609499, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830162211468345, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870090515990007, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877967042358298, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894466447202783, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990244376031976, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912512859231547, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923943218431974, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921374815075021, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934476872808055, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955509967709842, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967074107966925, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963423660711238, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980769751495436, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990246738062093, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.45.mlp": [ + { + "accuracy": 0.9201352533541227, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9223182452352423, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9322127354772467, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9357832419244867, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9583628271755419, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9621489204858479, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668306457368951, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776852789678072, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799318533194693, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785986950522975, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814707150584773, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890229600040537, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907097957636181, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938804315109002, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944143314894877, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953582612307448, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982764158201846, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.45.self_attn": [ + { + "accuracy": 0.9629747867584229, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9670002680075795, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9707284569740295, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759050654737573, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821518628220809, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824756995627755, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866861512786463, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.987092978075931, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885875449368828, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893974771625117, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910909792310313, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920413188244167, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920960421624937, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929787547964799, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956414805431115, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964887876259653, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964903483265325, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979266926254097, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999026622917307, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.46.mlp": [ + { + "accuracy": 0.9013373412584004, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9033729528125963, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9097391178733424, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9122588697232699, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9486566938852009, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9551890868889659, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9585895632442675, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9637917248826278, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9673186854312295, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9710871796858939, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9774151397378821, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872754576959109, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890937334612796, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909069271464097, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940878384207424, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948891742449057, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971624250082594, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.46.self_attn": [ + { + "accuracy": 0.9570280376233553, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.962700366973877, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9675809490053278, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732828673563505, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804562251818808, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811109116202906, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859198378889185, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867551311066276, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878911689708108, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888188972284919, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902946392172262, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912162885854119, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915561519171062, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923627392241829, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952050192575705, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996147974149177, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962206251527134, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977409516118074, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989566147131356, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.47.mlp": [ + { + "accuracy": 0.9219447813535991, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9245148583462364, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9319730808860377, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9349279905620375, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.960104807427055, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9637373434869867, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9672430377257497, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781940250020278, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802645570353458, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794040824237623, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821669114263434, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.989290053122922, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.990887535245795, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936662037905893, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.994539271451925, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953364549498809, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979675604324592, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.47.self_attn": [ + { + "accuracy": 0.975297074568899, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770933170067636, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.981149993444744, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845861886676989, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876344227477124, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876427893575869, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903529435396194, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904165111090008, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918272895248312, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917566266499067, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933842561746898, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941230518253226, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941030936805826, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945932963961049, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964308566168735, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970794625972447, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969137734488437, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986132393149953, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991445762938574, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9900266771253786, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903227506499541, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917065760022715, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.992506845217002, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948627705636778, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951657637169486, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995946871999063, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970836127667051, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970214221822588, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972689975249139, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977754730927316, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986458348208352, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980809094482347, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991951770590324, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992683768566502, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994683378915253, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996362974573123, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9935977874617827, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939872018600765, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944138879838743, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963194265177375, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968443010982714, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968930667168215, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980907648017532, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981499192746062, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982876865879485, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983892864302585, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.99841462291385, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985219970541565, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998588219873215, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998704615588251, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992280234827807, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993535587073941, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994980457091802, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996552232064699, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998658823560139, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9890765775191156, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894203237797085, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907766729593277, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914547448095522, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945968816939154, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950468830372158, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958033608762842, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969666584541923, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970918495796228, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997294689871763, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975841110082049, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985646761365627, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986668901616021, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990995450827637, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992727415734216, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994612368019787, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997107081458365, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.9938888400793076, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943094818215621, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947808510378787, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964115133411006, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969278174011331, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996990332674039, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981141319792521, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981878774338647, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983550529730948, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984511171320551, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984666330641822, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985827435984423, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986597195659813, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987813102963724, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992635107942318, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993858601113683, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995142129298887, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996676244037715, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998674031567613, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9871129699443516, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875462180689761, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892917620508295, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.98998892072, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935531184861535, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941302039905598, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949505976156184, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964692710261596, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968179285918412, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967125865973925, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971529570849318, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983147421949788, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985754475193588, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990339856594801, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999149610533526, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993445341426291, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9997285427280554, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.993300567332067, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936822888098265, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942917204216907, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.995950206329948, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966202660610801, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966980662000807, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.997965936401957, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980728787026907, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982354009622022, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983238864101862, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983206522700033, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984488783306197, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985495352823484, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986789961786646, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992001035103673, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993376263760423, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994698224197093, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996533499736535, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998560478224566, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9836812584023726, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842263962093153, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867637988768125, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9876056760549545, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917448029706353, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924890963654769, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935630454044593, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955802755920511, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.996010519172016, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957702904939651, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963463850711521, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978355051655519, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998174771274391, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987882436498215, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989007396721526, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991494345625764, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999667048723878, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9935418457577103, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940023943781853, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945366151238743, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960137672330204, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967837257212714, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968609233435831, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979302900794306, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980400537973956, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981725698238925, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982753650922525, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983943171407047, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985151184036544, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986006968507641, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987237749523238, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992175720828144, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.999354338469474, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994493834184188, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996441915190142, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998488181001065, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9808039680907601, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 474871952, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9814254170969913, + "down_proj": { + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64, + "6": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 492566672, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846912136203364, + "down_proj": { + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "3": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 549465856, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857194596215298, + "down_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 616640256, + "up_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902646651393489, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 128, + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 694777456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911121302529385, + "down_proj": { + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "3": 32, + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 714362112, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925225714319631, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 768025456, + "up_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "3": 32, + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948966907043206, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 878169328, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953913410243235, + "down_proj": { + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "4": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 891112704, + "up_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950304266653562, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 128, + "5": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 903509616, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956825359871513, + "down_proj": { + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "4": 32, + "5": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 923094272, + "up_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974640494114474, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 128, + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1112241776, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978443339074913, + "down_proj": { + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "group_size": { + "5": 32, + "6": 32, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 1131826432, + "up_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986199844432505, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "6": 128, + "8": 32 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1288992368, + "up_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987060174738106, + "down_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1332028224, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990121654577946, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1451303744, + "up_proj": { + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "6": 128, + "8": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996327847046288, + "down_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "gate_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 1706107712, + "up_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.99125248742731, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 133692416, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917744664769423, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 137624576, + "v_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924863231809515, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 141915264, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947295247724182, + "k_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "2": 64, + "3": 64 + }, + "scale_bits": 4 + }, + "total_bits": 167308544, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955142693299996, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197778944, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956145290481416, + "k_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 128, + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 197944704, + "v_proj": { + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "3": 64, + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972813778409833, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254402048, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974209146672174, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 128 + }, + "scale_bits": 4 + }, + "total_bits": 254567808, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976657915272211, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 64 + }, + "scale_bits": 4 + }, + "total_bits": 256707328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978148362746364, + "k_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + }, + "total_bits": 260323328, + "v_proj": { + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "4": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977854505965584, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 128, + "5": 128 + }, + "scale_bits": 4 + }, + "total_bits": 260859264, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979477085565266, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 262998784, + "v_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980849698185921, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 64, + "5": 64 + }, + "scale_bits": 4 + }, + "total_bits": 267385856, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 64 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.998264487048513, + "k_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "4": 32, + "5": 32 + }, + "scale_bits": 4 + }, + "total_bits": 271333376, + "v_proj": { + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "5": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989373001613115, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 128, + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 328326656, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991253811473909, + "k_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "group_size": { + "5": 32, + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 334247936, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992955510847663, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + }, + "total_bits": 380231168, + "v_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 128 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995395365220151, + "k_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "6": 32 + }, + "scale_bits": 4 + }, + "total_bits": 396638208, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 32 + }, + "scale_bits": 4 + } + }, + { + "accuracy": 0.9998091407193753, + "k_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "o_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "q_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + }, + "total_bits": 506060288, + "v_proj": { + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "group_size": { + "8": 128 + }, + "scale_bits": 4 + } + } + ], + "model.norm.norm": null + } +} \ No newline at end of file