Schrieffer2sy commited on
Commit
3a01517
·
1 Parent(s): df5f30b
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import torch
3
  from transformers import AutoTokenizer
4
  from sarm_llama import LlamaSARM
@@ -18,7 +19,6 @@ model = LlamaSARM.from_pretrained(
18
  sae_latent_size=65536,
19
  sae_k=192,
20
  device_map="auto", # <<< KEY CHANGE HERE
21
- trust_remote_code=True,
22
  torch_dtype=torch.bfloat16
23
  )
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
@@ -28,7 +28,7 @@ DEVICE = model.device
28
  print(f"Model loaded successfully on device: {DEVICE}")
29
 
30
  # --- 2. Define the Inference Function ---
31
-
32
  def get_reward_score(prompt: str, response: str) -> float:
33
  """
34
  Receives a prompt and a response, and returns the reward score calculated by the SARM model.
 
1
  import gradio as gr
2
+ import spaces
3
  import torch
4
  from transformers import AutoTokenizer
5
  from sarm_llama import LlamaSARM
 
19
  sae_latent_size=65536,
20
  sae_k=192,
21
  device_map="auto", # <<< KEY CHANGE HERE
 
22
  torch_dtype=torch.bfloat16
23
  )
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
 
28
  print(f"Model loaded successfully on device: {DEVICE}")
29
 
30
  # --- 2. Define the Inference Function ---
31
+ @spaces.GPU
32
  def get_reward_score(prompt: str, response: str) -> float:
33
  """
34
  Receives a prompt and a response, and returns the reward score calculated by the SARM model.