Spaces:

Anuji
/

OCR-app

Sleeping

Anuji commited on Apr 3

Commit

0e619d7

verified ·

1 Parent(s): ed392d3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,11 +20,12 @@ def fetch_model(model_name: str, dtype=torch.bfloat16):
         model_info = load_model(model_name, dtype=dtype)
         tokenizer, model, vl_chat_processor = model_info
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        model = model.to(device)
         DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
         logger.info(f"Loaded {model_name} on {device}")
     return DEPLOY_MODELS[model_name]
 # Generate prompt with history
 def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
     conversation = vl_chat_processor.new_chat_template()

         model_info = load_model(model_name, dtype=dtype)
         tokenizer, model, vl_chat_processor = model_info
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # Avoid .cuda() call from deepseek_vl2 by moving to device here
+        model = model.to(device).eval()  # Move to device and set eval mode
         DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
         logger.info(f"Loaded {model_name} on {device}")
     return DEPLOY_MODELS[model_name]
 # Generate prompt with history
 def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
     conversation = vl_chat_processor.new_chat_template()