Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -19,10 +19,9 @@ os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
|
|
19 |
# ## GGUF MOD: BitsAndBytesConfig is no longer needed.
|
20 |
from ctransformers import AutoModelForCausalLM
|
21 |
from transformers import AutoTokenizer
|
22 |
-
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
23 |
|
24 |
# ## GGUF MOD: PyTorch backend settings are not used by ctransformers.
|
25 |
-
|
26 |
# ... (rest of torch settings removed for clarity)
|
27 |
|
28 |
# --- Model and Tokenizer Configuration ---
|
@@ -47,8 +46,8 @@ print("Loading GGUF model...")
|
|
47 |
# leading to much faster inference. Adjust this number based on your VRAM.
|
48 |
# - hf=True: This tells ctransformers to download from the Hugging Face Hub.
|
49 |
|
50 |
-
@spaces.GPU(require=True)
|
51 |
def loadModel():
|
|
|
52 |
model = AutoModelForCausalLM.from_pretrained(
|
53 |
model_repo_id,
|
54 |
model_file=model_file,
|
|
|
19 |
# ## GGUF MOD: BitsAndBytesConfig is no longer needed.
|
20 |
from ctransformers import AutoModelForCausalLM
|
21 |
from transformers import AutoTokenizer
|
|
|
22 |
|
23 |
# ## GGUF MOD: PyTorch backend settings are not used by ctransformers.
|
24 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
25 |
# ... (rest of torch settings removed for clarity)
|
26 |
|
27 |
# --- Model and Tokenizer Configuration ---
|
|
|
46 |
# leading to much faster inference. Adjust this number based on your VRAM.
|
47 |
# - hf=True: This tells ctransformers to download from the Hugging Face Hub.
|
48 |
|
|
|
49 |
def loadModel():
|
50 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
51 |
model = AutoModelForCausalLM.from_pretrained(
|
52 |
model_repo_id,
|
53 |
model_file=model_file,
|