Pretam1 commited on
Commit
4f343b0
·
verified ·
1 Parent(s): 319d3d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -0
app.py CHANGED
@@ -22,6 +22,9 @@ def predict_caption(image):
22
  if image.mode != "RGB":
23
  image = image.convert(mode="RGB")
24
 
 
 
 
25
  # Process image with padding enabled to handle batched tensor conversion
26
  pixel_values = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values.to(device)
27
 
 
22
  if image.mode != "RGB":
23
  image = image.convert(mode="RGB")
24
 
25
+ # Resize image to a fixed size (ViT typically requires 224x224 or 384x384)
26
+ image = image.resize((384, 384)) # Resize to 384x384 for ViT
27
+
28
  # Process image with padding enabled to handle batched tensor conversion
29
  pixel_values = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values.to(device)
30