Spaces:

Anuji
/

OCR-app

Sleeping

Anuji commited on Apr 9

Commit

755d5e1

verified ·

1 Parent(s): b975069

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,19 +13,22 @@ DEPLOY_MODELS = {}
 IMAGE_TOKEN = "<image>"
 # Fetch model
-def fetch_model(model_name: str, dtype=None):
     global DEPLOY_MODELS
     if model_name not in DEPLOY_MODELS:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        # Use bfloat16 only if using GPU
-        dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
-        logger.info(f"Loading {model_name} on {device} with dtype={dtype}...")
         model_info = load_model(model_name, dtype=dtype)
         tokenizer, model, vl_chat_processor = model_info
-        model = model.to(device)
         DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
-        logger.info(f"Loaded {model_name} successfully.")
     return DEPLOY_MODELS[model_name]

 IMAGE_TOKEN = "<image>"
 # Fetch model
+def fetch_model(model_name: str, dtype=torch.bfloat16):
     global DEPLOY_MODELS
     if model_name not in DEPLOY_MODELS:
+        logger.info(f"Loading {model_name}...")
         model_info = load_model(model_name, dtype=dtype)
         tokenizer, model, vl_chat_processor = model_info
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        try:
+            model = model.to(device)
+        except RuntimeError as e:
+            logger.warning(f"Could not move model to {device}: {e}")
+            device = torch.device('cpu')
+            model = model.to(device)
+            logger.warning("Model fallback to CPU. Inference might be slow.")
         DEPLOY_MODELS[model_name] = (tokenizer, model, vl_chat_processor)
+        logger.info(f"Loaded {model_name} on {device}")
     return DEPLOY_MODELS[model_name]