Darius Morawiec commited on
Commit
bfa4ccc
·
1 Parent(s): a84c724

Implement AutoModel class for dynamic model loading and refactor model initialization

Browse files
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -46,6 +46,27 @@ MODEL_IDS = [
46
  "Qwen/Qwen3-VL-32B-Instruct", # https://huggingface.co/Qwen/Qwen3-VL-32B-Instruct
47
  ]
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def resize_image(image, target_size=1000):
51
  width, height = image.size
@@ -149,11 +170,6 @@ with gr.Blocks() as demo:
149
  with gr.Row():
150
  run_button = gr.Button("Run")
151
 
152
- # Global variables to track loaded model
153
- current_model = None
154
- current_processor = None
155
- current_model_id = None
156
-
157
  def load_model(model_id: str):
158
  global current_model, current_processor, current_model_id
159
 
@@ -170,23 +186,12 @@ with gr.Blocks() as demo:
170
 
171
  # Force garbage collection and clear CUDA cache
172
  gc.collect()
173
- torch.cuda.empty_cache()
174
-
175
  if torch.cuda.is_available():
 
176
  torch.cuda.synchronize()
177
 
178
- # Load new model
179
- model_loader = None
180
- if model_id.startswith("Qwen/Qwen2-VL"):
181
- model_loader = Qwen2VLForConditionalGeneration
182
- elif model_id.startswith("Qwen/Qwen2.5-VL"):
183
- model_loader = Qwen2_5_VLForConditionalGeneration
184
- elif model_id.startswith("Qwen/Qwen3-VL"):
185
- model_loader = Qwen3VLForConditionalGeneration
186
- assert model_loader is not None, f"Unsupported model ID: {model_id}"
187
- # Load model on CPU to avoid using CUDA resources during download
188
- current_model = model_loader.from_pretrained(
189
- model_id, torch_dtype=torch.bfloat16, device_map="cpu"
190
  )
191
  current_processor = AutoProcessor.from_pretrained(model_id)
192
  current_model_id = model_id
@@ -290,8 +295,10 @@ with gr.Blocks() as demo:
290
  image_resize: str = "Yes",
291
  image_target_size: int | None = None,
292
  ):
 
293
  model, processor = load_model(model_id)
294
 
 
295
  return generate(
296
  model,
297
  processor,
 
46
  "Qwen/Qwen3-VL-32B-Instruct", # https://huggingface.co/Qwen/Qwen3-VL-32B-Instruct
47
  ]
48
 
49
+ # Global variables to track loaded model
50
+ current_model = None
51
+ current_processor = None
52
+ current_model_id = None
53
+
54
+
55
+ class AutoModel:
56
+ @staticmethod
57
+ def from_pretrained(model_id, dtype="auto", device_map="cpu"):
58
+ if model_id.startswith("Qwen/Qwen2-VL"):
59
+ model_loader = Qwen2VLForConditionalGeneration
60
+ elif model_id.startswith("Qwen/Qwen2.5-VL"):
61
+ model_loader = Qwen2_5_VLForConditionalGeneration
62
+ elif model_id.startswith("Qwen/Qwen3-VL"):
63
+ model_loader = Qwen3VLForConditionalGeneration
64
+ else:
65
+ raise ValueError(f"Unsupported model ID: {model_id}")
66
+ return model_loader.from_pretrained(
67
+ model_id, dtype=dtype, device_map=device_map
68
+ )
69
+
70
 
71
  def resize_image(image, target_size=1000):
72
  width, height = image.size
 
170
  with gr.Row():
171
  run_button = gr.Button("Run")
172
 
 
 
 
 
 
173
  def load_model(model_id: str):
174
  global current_model, current_processor, current_model_id
175
 
 
186
 
187
  # Force garbage collection and clear CUDA cache
188
  gc.collect()
 
 
189
  if torch.cuda.is_available():
190
+ torch.cuda.empty_cache()
191
  torch.cuda.synchronize()
192
 
193
+ current_model = AutoModel.from_pretrained(
194
+ model_id, dtype="auto", device_map="cpu"
 
 
 
 
 
 
 
 
 
 
195
  )
196
  current_processor = AutoProcessor.from_pretrained(model_id)
197
  current_model_id = model_id
 
295
  image_resize: str = "Yes",
296
  image_target_size: int | None = None,
297
  ):
298
+ # Load the model and processor on CPU
299
  model, processor = load_model(model_id)
300
 
301
+ # Run inference on GPU (if available)
302
  return generate(
303
  model,
304
  processor,