Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,9 @@ def predict_caption(image):
|
|
22 |
if image.mode != "RGB":
|
23 |
image = image.convert(mode="RGB")
|
24 |
|
|
|
|
|
|
|
25 |
# Process image with padding enabled to handle batched tensor conversion
|
26 |
pixel_values = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values.to(device)
|
27 |
|
|
|
22 |
if image.mode != "RGB":
|
23 |
image = image.convert(mode="RGB")
|
24 |
|
25 |
+
# Resize image to a fixed size (ViT typically requires 224x224 or 384x384)
|
26 |
+
image = image.resize((384, 384)) # Resize to 384x384 for ViT
|
27 |
+
|
28 |
# Process image with padding enabled to handle batched tensor conversion
|
29 |
pixel_values = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values.to(device)
|
30 |
|