Spaces:

Pretam1
/

Hw9

Sleeping

Pretam1 commited on Nov 7, 2024

Commit

319d3d6

verified ·

1 Parent(s): 9b0c2df

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,9 +22,9 @@ def predict_caption(image):
     if image.mode != "RGB":
         image = image.convert(mode="RGB")
-    # Process image and move pixel values to device
-    pixel_values = feature_extractor(images=[image], return_tensors="pt").pixel_values.to(device)
     # Generate caption
     output_ids = model.generate(pixel_values, **gen_kwargs)
     preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
@@ -52,3 +52,4 @@ def main():
 # Run the application
 if __name__ == "__main__":
     main()

     if image.mode != "RGB":
         image = image.convert(mode="RGB")
+    # Process image with padding enabled to handle batched tensor conversion
+    pixel_values = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values.to(device)
     # Generate caption
     output_ids = model.generate(pixel_values, **gen_kwargs)
     preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
 # Run the application
 if __name__ == "__main__":
     main()