Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,7 @@ class CustomHuggingFaceLLM(LLM):
|
|
23 |
llm_int8_enable_fp32_cpu_offload=True # Offload FP32 operations to CPU for further memory savings
|
24 |
)
|
25 |
|
26 |
-
self.model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=quantization_config)
|
27 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
28 |
self.temperature = temperature
|
29 |
|
|
|
23 |
llm_int8_enable_fp32_cpu_offload=True # Offload FP32 operations to CPU for further memory savings
|
24 |
)
|
25 |
|
26 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu", quantization_config=quantization_config)
|
27 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
28 |
self.temperature = temperature
|
29 |
|