Update app.py
Browse files
app.py
CHANGED
@@ -20,11 +20,12 @@ def fetch_model(model_name: str, dtype=torch.bfloat16):
|
|
20 |
model_info = load_model(model_name, dtype=dtype)
|
21 |
tokenizer, model, vl_chat_processor = model_info
|
22 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
23 |
-
|
|
|
24 |
DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
|
25 |
logger.info(f"Loaded {model_name} on {device}")
|
26 |
return DEPLOY_MODELS[model_name]
|
27 |
-
|
28 |
# Generate prompt with history
|
29 |
def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
|
30 |
conversation = vl_chat_processor.new_chat_template()
|
|
|
20 |
model_info = load_model(model_name, dtype=dtype)
|
21 |
tokenizer, model, vl_chat_processor = model_info
|
22 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
23 |
+
# Avoid .cuda() call from deepseek_vl2 by moving to device here
|
24 |
+
model = model.to(device).eval() # Move to device and set eval mode
|
25 |
DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
|
26 |
logger.info(f"Loaded {model_name} on {device}")
|
27 |
return DEPLOY_MODELS[model_name]
|
28 |
+
|
29 |
# Generate prompt with history
|
30 |
def generate_prompt_with_history(text, images, history, vl_chat_processor, tokenizer, max_length=2048):
|
31 |
conversation = vl_chat_processor.new_chat_template()
|