Spaces:

Pretam1
/

Hw9

Sleeping

Pretam1 commited on Nov 7, 2024

Commit

4f343b0

verified ·

1 Parent(s): 319d3d6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,6 +22,9 @@ def predict_caption(image):
     if image.mode != "RGB":
         image = image.convert(mode="RGB")
     # Process image with padding enabled to handle batched tensor conversion
     pixel_values = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values.to(device)

     if image.mode != "RGB":
         image = image.convert(mode="RGB")
+    # Resize image to a fixed size (ViT typically requires 224x224 or 384x384)
+    image = image.resize((384, 384))  # Resize to 384x384 for ViT
     # Process image with padding enabled to handle batched tensor conversion
     pixel_values = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values.to(device)