Spaces:

dmorawiec
/

Qwen-VL-Object-Detection

Running on Zero

App Files Files Community

Darius Morawiec commited on 11 days ago

Commit

bfa4ccc

1 Parent(s): a84c724

Implement AutoModel class for dynamic model loading and refactor model initialization

Browse files

Files changed (1) hide show

app.py +26 -19

app.py CHANGED Viewed

@@ -46,6 +46,27 @@ MODEL_IDS = [
     "Qwen/Qwen3-VL-32B-Instruct",  # https://huggingface.co/Qwen/Qwen3-VL-32B-Instruct
 ]
 def resize_image(image, target_size=1000):
     width, height = image.size
@@ -149,11 +170,6 @@ with gr.Blocks() as demo:
     with gr.Row():
         run_button = gr.Button("Run")
-    # Global variables to track loaded model
-    current_model = None
-    current_processor = None
-    current_model_id = None
     def load_model(model_id: str):
         global current_model, current_processor, current_model_id
@@ -170,23 +186,12 @@ with gr.Blocks() as demo:
             # Force garbage collection and clear CUDA cache
             gc.collect()
-            torch.cuda.empty_cache()
             if torch.cuda.is_available():
                 torch.cuda.synchronize()
-            # Load new model
-            model_loader = None
-            if model_id.startswith("Qwen/Qwen2-VL"):
-                model_loader = Qwen2VLForConditionalGeneration
-            elif model_id.startswith("Qwen/Qwen2.5-VL"):
-                model_loader = Qwen2_5_VLForConditionalGeneration
-            elif model_id.startswith("Qwen/Qwen3-VL"):
-                model_loader = Qwen3VLForConditionalGeneration
-            assert model_loader is not None, f"Unsupported model ID: {model_id}"
-            # Load model on CPU to avoid using CUDA resources during download
-            current_model = model_loader.from_pretrained(
-                model_id, torch_dtype=torch.bfloat16, device_map="cpu"
             )
             current_processor = AutoProcessor.from_pretrained(model_id)
             current_model_id = model_id
@@ -290,8 +295,10 @@ with gr.Blocks() as demo:
         image_resize: str = "Yes",
         image_target_size: int | None = None,
     ):
         model, processor = load_model(model_id)
         return generate(
             model,
             processor,

     "Qwen/Qwen3-VL-32B-Instruct",  # https://huggingface.co/Qwen/Qwen3-VL-32B-Instruct
 ]
+# Global variables to track loaded model
+current_model = None
+current_processor = None
+current_model_id = None
+class AutoModel:
+    @staticmethod
+    def from_pretrained(model_id, dtype="auto", device_map="cpu"):
+        if model_id.startswith("Qwen/Qwen2-VL"):
+            model_loader = Qwen2VLForConditionalGeneration
+        elif model_id.startswith("Qwen/Qwen2.5-VL"):
+            model_loader = Qwen2_5_VLForConditionalGeneration
+        elif model_id.startswith("Qwen/Qwen3-VL"):
+            model_loader = Qwen3VLForConditionalGeneration
+        else:
+            raise ValueError(f"Unsupported model ID: {model_id}")
+        return model_loader.from_pretrained(
+            model_id, dtype=dtype, device_map=device_map
+        )
 def resize_image(image, target_size=1000):
     width, height = image.size
     with gr.Row():
         run_button = gr.Button("Run")
     def load_model(model_id: str):
         global current_model, current_processor, current_model_id
             # Force garbage collection and clear CUDA cache
             gc.collect()
             if torch.cuda.is_available():
+                torch.cuda.empty_cache()
                 torch.cuda.synchronize()
+            current_model = AutoModel.from_pretrained(
+                model_id, dtype="auto", device_map="cpu"
             )
             current_processor = AutoProcessor.from_pretrained(model_id)
             current_model_id = model_id
         image_resize: str = "Yes",
         image_target_size: int | None = None,
     ):
+        # Load the model and processor on CPU
         model, processor = load_model(model_id)
+        # Run inference on GPU (if available)
         return generate(
             model,
             processor,