Anuji commited on
Commit
0e619d7
·
verified ·
1 Parent(s): ed392d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -20,11 +20,12 @@ def fetch_model(model_name: str, dtype=torch.bfloat16):
20
  model_info = load_model(model_name, dtype=dtype)
21
  tokenizer, model, vl_chat_processor = model_info
22
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
23
- model = model.to(device)
 
24
  DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
25
  logger.info(f"Loaded {model_name} on {device}")
26
  return DEPLOY_MODELS[model_name]
27
-
28
  # Generate prompt with history
29
  def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
30
  conversation = vl_chat_processor.new_chat_template()
 
20
  model_info = load_model(model_name, dtype=dtype)
21
  tokenizer, model, vl_chat_processor = model_info
22
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
23
+ # Avoid .cuda() call from deepseek_vl2 by moving to device here
24
+ model = model.to(device).eval() # Move to device and set eval mode
25
  DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
26
  logger.info(f"Loaded {model_name} on {device}")
27
  return DEPLOY_MODELS[model_name]
28
+
29
  # Generate prompt with history
30
  def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
31
  conversation = vl_chat_processor.new_chat_template()