Spaces:

breadlicker45
/

PaliGemma2

Sleeping

App Files Files Community

breadlicker45 commited on Dec 15, 2024

Commit

c91d9f3

verified ·

1 Parent(s): 095675c

Create app.py

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+from transformers import AutoProcessor, AutoModelForImageTextToText
+from PIL import Image
+import torch
+import os
+import spaces  # Import the spaces module
+def load_model():
+    """Load PaliGemma2 model and processor with Hugging Face token."""
+    token = os.getenv("HUGGINGFACEHUB_API_TOKEN")  # Retrieve token from environment variable
+    if not token:
+        raise ValueError(
+            "Hugging Face API token not found. Please set it in the environment variables."
+        )
+    # Load the processor and model using the correct identifier
+    processor = AutoProcessor.from_pretrained(
+        "google/paligemma2-3b-pt-224", use_auth_token=token
+    )
+    model = AutoModelForImageTextToText.from_pretrained(
+        "google/paligemma2-3b-pt-224", use_auth_token=token
+    )
+    return processor, model
+@spaces.GPU  # Decorate the function that uses the GPU
+def process_image(image):
+    """Extract text from image using PaliGemma2."""
+    processor, model = load_model()
+    # Preprocess the image
+    inputs = processor(images=image, return_tensors="pt")
+    # Generate predictions
+    with torch.no_grad():
+        generated_ids = model.generate(**inputs)
+        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return text
+if __name__ == "__main__":
+    iface = gr.Interface(
+        fn=process_image,
+        inputs=gr.Image(type="pil", label="Upload an image containing text"),
+        outputs=gr.Textbox(label="Extracted Text"),
+        title="Text Reading from Images using PaliGemma2",
+        description="Upload an image containing text and the model will extract the text.",
+    )
+    iface.launch()