8-bitowa kwantyzacja
Browse files
app.py
CHANGED
@@ -27,6 +27,7 @@ def load_model():
|
|
27 |
torch_dtype=torch.bfloat16,
|
28 |
device_map="auto",
|
29 |
low_cpu_mem_usage=True
|
|
|
30 |
)
|
31 |
logger.info("Model loaded successfully")
|
32 |
model_load_queue.put(model)
|
|
|
27 |
torch_dtype=torch.bfloat16,
|
28 |
device_map="auto",
|
29 |
low_cpu_mem_usage=True
|
30 |
+
load_in_8bit=True
|
31 |
)
|
32 |
logger.info("Model loaded successfully")
|
33 |
model_load_queue.put(model)
|