breadlicker45 commited on
Commit
c91d9f3
·
verified ·
1 Parent(s): 095675c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForImageTextToText
3
+ from PIL import Image
4
+ import torch
5
+ import os
6
+ import spaces # Import the spaces module
7
+
8
+
9
+ def load_model():
10
+ """Load PaliGemma2 model and processor with Hugging Face token."""
11
+
12
+ token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable
13
+
14
+ if not token:
15
+ raise ValueError(
16
+ "Hugging Face API token not found. Please set it in the environment variables."
17
+ )
18
+
19
+ # Load the processor and model using the correct identifier
20
+ processor = AutoProcessor.from_pretrained(
21
+ "google/paligemma2-3b-pt-224", use_auth_token=token
22
+ )
23
+ model = AutoModelForImageTextToText.from_pretrained(
24
+ "google/paligemma2-3b-pt-224", use_auth_token=token
25
+ )
26
+
27
+ return processor, model
28
+
29
+
30
+ @spaces.GPU # Decorate the function that uses the GPU
31
+ def process_image(image):
32
+ """Extract text from image using PaliGemma2."""
33
+ processor, model = load_model()
34
+
35
+ # Preprocess the image
36
+ inputs = processor(images=image, return_tensors="pt")
37
+
38
+ # Generate predictions
39
+ with torch.no_grad():
40
+ generated_ids = model.generate(**inputs)
41
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
42
+
43
+ return text
44
+
45
+
46
+ if __name__ == "__main__":
47
+ iface = gr.Interface(
48
+ fn=process_image,
49
+ inputs=gr.Image(type="pil", label="Upload an image containing text"),
50
+ outputs=gr.Textbox(label="Extracted Text"),
51
+ title="Text Reading from Images using PaliGemma2",
52
+ description="Upload an image containing text and the model will extract the text.",
53
+ )
54
+ iface.launch()