muhammadsalmanalfaridzi commited on
Commit
792db51
·
verified ·
1 Parent(s): e72f60f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +69 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import base64
4
+ import json
5
+ import gradio as gr
6
+ from PIL import Image, ImageDraw, ImageFont
7
+
8
+ # Load secrets from environment variables (configure these in your Hugging Face Space)
9
+ API_KEY = os.environ.get("API_KEY")
10
+ OCR_API_URL = os.environ.get("OCR_API_URL")
11
+ OCR_TASK = os.environ.get("OCR_TASK")
12
+ OCR_FUNCTION_NAME = os.environ.get("OCR_FUNCTION_NAME")
13
+
14
+ def call_ocr(image_path: str):
15
+ # Read and encode the image to base64
16
+ with open(image_path, "rb") as image_file:
17
+ base64_string = base64.b64encode(image_file.read()).decode('utf-8')
18
+
19
+ payload = {
20
+ "image": base64_string,
21
+ "task": OCR_TASK,
22
+ "function_name": OCR_FUNCTION_NAME
23
+ }
24
+
25
+ headers = {
26
+ "Content-Type": "application/json",
27
+ "Accept": "application/json",
28
+ "Authorization": f"Basic {API_KEY}"
29
+ }
30
+
31
+ response = requests.post(OCR_API_URL, json=payload, headers=headers)
32
+ response.raise_for_status()
33
+ response_json = response.json()
34
+
35
+ # Extract quad_boxes and labels
36
+ data = response_json.get('data', {}).get(OCR_TASK, {})
37
+ quad_boxes = data.get('quad_boxes', [])
38
+ labels = data.get('labels', [])
39
+
40
+ # Clean up labels
41
+ cleaned_labels = [label.replace("</s>", "").strip() for label in labels]
42
+
43
+ # Open the original image and draw boxes
44
+ image = Image.open(image_path)
45
+ draw = ImageDraw.Draw(image)
46
+
47
+ # Use a true-type font if available
48
+ try:
49
+ font = ImageFont.truetype("arial.ttf", 16)
50
+ except IOError:
51
+ font = None
52
+
53
+ for quad, label in zip(quad_boxes, cleaned_labels):
54
+ polygon = [(quad[i], quad[i+1]) for i in range(0, len(quad), 2)]
55
+ draw.polygon(polygon, outline="red", width=2)
56
+ draw.text((quad[0], quad[1]), label, fill="blue", font=font)
57
+
58
+ output_labels = "\n".join(cleaned_labels)
59
+ return image, output_labels
60
+
61
+ iface = gr.Interface(
62
+ fn=call_ocr,
63
+ inputs=gr.Image(type="filepath", label="Upload Image"),
64
+ outputs=[gr.Image(label="Annotated Image"), gr.Textbox(label="Detected Labels")],
65
+ title="Optical Character Recognition (OCR)"
66
+ )
67
+
68
+ if __name__ == "__main__":
69
+ iface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ groq
3
+ Pillow
4
+ requests