Spaces:

V-E-D
/

paligamma

Sleeping

ved1beta commited on Jan 23

Commit

e0a390e

1 Parent(s): cf83b3d

note

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,8 +15,14 @@ def generate_caption(image, prompt="What is in this image?", max_tokens=100):
     if image is None:
         return "Please upload an image."
     # Preprocess inputs
-    model_inputs = processor(text=prompt, images=image, return_tensors="pt")
     input_len = model_inputs["input_ids"].shape[-1]
     # Generate caption
@@ -30,7 +36,7 @@ def generate_caption(image, prompt="What is in this image?", max_tokens=100):
 # Load local example images
 def load_local_images():
     """Load images from the repository"""
-    image_files = ['image1.jpeg', 'image2.jpg', 'image3.jpeg']
     local_images = []
     for img_file in image_files:
         try:

     if image is None:
         return "Please upload an image."
+    # Update UI to show processing
+    gr.Info("Analysis starting. This may take up to 119 seconds.")
+    # Modify prompt to include image token
+    full_prompt = "<image> " + prompt
     # Preprocess inputs
+    model_inputs = processor(text=full_prompt, images=image, return_tensors="pt")
     input_len = model_inputs["input_ids"].shape[-1]
     # Generate caption
 # Load local example images
 def load_local_images():
     """Load images from the repository"""
+    image_files = ['image1.jpg', 'image2.jpg', 'image3.jpg']
     local_images = []
     for img_file in image_files:
         try: