import gradio as gr from transformers import AutoProcessor, PaliGemmaForConditionalGeneration from PIL import Image import torch import os # Load the model and processor model_id = "google/paligemma-3b-mix-224" HF_TOKEN = os.getenv('HF_TOKEN') model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, token=HF_TOKEN).eval() processor = AutoProcessor.from_pretrained(model_id, token=HF_TOKEN) def generate_caption(image, prompt="What is in this image?", max_tokens=100): """Generate image description""" if image is None: return "Please upload an image." # Update UI to show processing gr.Info("Analysis starting. This may take up to 119 seconds.") # Modify prompt to include image token full_prompt = " " + prompt # Preprocess inputs model_inputs = processor(text=full_prompt, images=image, return_tensors="pt") input_len = model_inputs["input_ids"].shape[-1] # Generate caption with torch.inference_mode(): generation = model.generate(**model_inputs, max_new_tokens=max_tokens, do_sample=False) generation = generation[0][input_len:] decoded = processor.decode(generation, skip_special_tokens=True) return decoded # Load local example images def load_local_images(): """Load images from the repository""" image_files = ['image1.jpg', 'image2.jpg', 'image3.jpg'] local_images = [] for img_file in image_files: try: img_path = os.path.join('.', img_file) if os.path.exists(img_path): local_images.append(Image.open(img_path)) except Exception as e: print(f"Could not load {img_file}: {e}") return local_images # Prepare example images EXAMPLE_IMAGES = load_local_images() # Create Gradio Interface with gr.Blocks() as demo: gr.Markdown("# PaliGemma Image Analysis") with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Upload or Select Image") custom_prompt = gr.Textbox(label="Custom Prompt", value="What is in this image?") submit_btn = gr.Button("Analyze Image") with gr.Column(): output_text = gr.Textbox(label="Image Description") # Connect components submit_btn.click( fn=generate_caption, inputs=[input_image, custom_prompt], outputs=output_text ) # Add example images gr.Examples( examples=[[img, "What is in this image?"] for img in EXAMPLE_IMAGES], inputs=[input_image, custom_prompt], fn=generate_caption, outputs=output_text ) # Launch the app if __name__ == "__main__": demo.launch()