1inkusFace commited on
Commit
01293f8
·
verified ·
1 Parent(s): 9b2f6e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -19,10 +19,9 @@ os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1')
19
  # ## GGUF MOD: BitsAndBytesConfig is no longer needed.
20
  from ctransformers import AutoModelForCausalLM
21
  from transformers import AutoTokenizer
22
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
23
 
24
  # ## GGUF MOD: PyTorch backend settings are not used by ctransformers.
25
- # torch.backends.cuda.matmul.allow_tf32 = True
26
  # ... (rest of torch settings removed for clarity)
27
 
28
  # --- Model and Tokenizer Configuration ---
@@ -47,8 +46,8 @@ print("Loading GGUF model...")
47
  # leading to much faster inference. Adjust this number based on your VRAM.
48
  # - hf=True: This tells ctransformers to download from the Hugging Face Hub.
49
 
50
- @spaces.GPU(require=True)
51
  def loadModel():
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
  model_repo_id,
54
  model_file=model_file,
 
19
  # ## GGUF MOD: BitsAndBytesConfig is no longer needed.
20
  from ctransformers import AutoModelForCausalLM
21
  from transformers import AutoTokenizer
 
22
 
23
  # ## GGUF MOD: PyTorch backend settings are not used by ctransformers.
24
+ torch.backends.cuda.matmul.allow_tf32 = True
25
  # ... (rest of torch settings removed for clarity)
26
 
27
  # --- Model and Tokenizer Configuration ---
 
46
  # leading to much faster inference. Adjust this number based on your VRAM.
47
  # - hf=True: This tells ctransformers to download from the Hugging Face Hub.
48
 
 
49
  def loadModel():
50
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
51
  model = AutoModelForCausalLM.from_pretrained(
52
  model_repo_id,
53
  model_file=model_file,