{ | |
"batcher": null, | |
"cacher": null, | |
"compiler": null, | |
"factorizer": null, | |
"pruner": null, | |
"quantizer": "llm_int8", | |
"llm_int8_compute_dtype": "bfloat16", | |
"llm_int8_double_quant": false, | |
"llm_int8_enable_fp32_cpu_offload": false, | |
"llm_int8_has_fp16_weight": false, | |
"llm_int8_quant_type": "fp4", | |
"llm_int8_threshold": 6.0, | |
"llm_int8_weight_bits": 8, | |
"batch_size": 1, | |
"device": "cuda", | |
"device_map": null, | |
"save_fns": [], | |
"load_fns": [ | |
"transformers" | |
], | |
"reapply_after_load": { | |
"factorizer": null, | |
"pruner": null, | |
"quantizer": null, | |
"cacher": null, | |
"compiler": null, | |
"batcher": null | |
} | |
} |