Mo41 commited on
Commit
b5677e9
·
1 Parent(s): 65d4f8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -0
app.py CHANGED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import json
4
+ import easyocr
5
+ import datasets
6
+ import socket
7
+ import requests
8
+ import keras_ocr
9
+
10
+ import numpy as np
11
+ import gradio as gr
12
+ import pandas as pd
13
+ import tensorflow as tf
14
+ import re as r
15
+
16
+ from PIL import Image
17
+ from datasets import Image
18
+ from datetime import datetime
19
+ from paddleocr import PaddleOCR
20
+ from urllib.request import urlopen
21
+ from huggingface_hub import Repository, upload_file
22
+
23
+ """
24
+ Paddle OCR
25
+ """
26
+ def ocr_with_paddle(img):
27
+ finaltext = ''
28
+ ocr = PaddleOCR(lang='en', use_angle_cls=True)
29
+ # img_path = 'exp.jpeg'
30
+ result = ocr.ocr(img)
31
+
32
+ for i in range(len(result[0])):
33
+ text = result[0][i][1][0]
34
+ finaltext += ' '+ text
35
+ return finaltext
36
+
37
+ """
38
+ Keras OCR
39
+ """
40
+ def ocr_with_keras(img):
41
+ output_text = ''
42
+ pipeline=keras_ocr.pipeline.Pipeline()
43
+ images=[keras_ocr.tools.read(img)]
44
+ predictions=pipeline.recognize(images)
45
+ first=predictions[0]
46
+ for text,box in first:
47
+ output_text += ' '+ text
48
+ return output_text
49
+
50
+ """
51
+ easy OCR
52
+ """
53
+ # gray scale image
54
+ def get_grayscale(image):
55
+ return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
56
+
57
+ # Thresholding or Binarization
58
+ def thresholding(src):
59
+ return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]
60
+
61
+ def ocr_with_easy(img):
62
+ gray_scale_image=get_grayscale(img)
63
+ thresholding(gray_scale_image)
64
+ cv2.imwrite('image.png',gray_scale_image)
65
+ reader = easyocr.Reader(['th','en'])
66
+ bounds = reader.readtext('image.png',paragraph="False",detail = 0)
67
+ bounds = ''.join(bounds)
68
+ return bounds
69
+
70
+ """
71
+ Generate OCR
72
+ """
73
+ def generate_ocr(Method,input_image):
74
+ text_output = ''
75
+ if (input_image).any():
76
+ print("Method___________________",Method)
77
+ if Method == 'EasyOCR':
78
+ text_output = ocr_with_easy(input_image)
79
+ if Method == 'KerasOCR':
80
+ text_output = ocr_with_keras(input_image)
81
+ if Method == 'PaddleOCR':
82
+ text_output = ocr_with_paddle(input_image)
83
+
84
+ flag(Method,input_image,text_output,ip_address,location)
85
+ return text_output
86
+ else:
87
+ raise gr.Error("Please upload an image!!!!")
88
+
89
+ image = gr.Image(shape=(300, 300))
90
+ method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR",elem_id="radio_div")
91
+ output = gr.Textbox(label="Output",elem_id="opbox")
92
+
93
+ demo = gr.Interface(
94
+ generate_ocr,
95
+ [method,image],
96
+ output,
97
+ title="Optical Character Recognition",
98
+ css=".gradio-container {background-color: #C0E1F2} #radio_div {background-color: #ADA5EC; font-size: 40px;} #btn {background-color: #94D68B; font-size: 20px;} #opbox {background-color: #ADA5EC;}",
99
+ article="""<p style='text-align: center;'>Feel free to give us your <a href="https://www.pragnakalp.com/contact/" target="_blank">feedback</a> and contact us at
100
+ <a href="mailto:[email protected]" target="_blank">[email protected]</a> And don't forget to check out more interesting
101
+ <a href="https://www.pragnakalp.com/services/natural-language-processing-services/" target="_blank">NLP services</a> we are offering.</p>
102
+ <p style='text-align: center;'>Developed by :<a href="https://www.pragnakalp.com" target="_blank"> Pragnakalp Techlabs</a></p>"""
103
+
104
+ )
105
+ demo.launch()
106
+
107
+ HF_TOKEN = os.environ.get("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")
108
+ DATASET_NAME = "ocr-image-to-text"
109
+
110
+ DATASET_REPO_URL = f"https://huggingface.co/datasets/Mo41/{DATASET_NAME}"
111
+ HF_TOKEN = os.environ.get("HF_TOKEN")
112
+ DATASET_REPO_ID = "Mo41/ocr-image-to-text"
113
+ print("is none?", HF_TOKEN is None)
114
+ REPOSITORY_DIR = "data"
115
+ LOCAL_DIR = 'data_local'
116
+ os.makedirs(LOCAL_DIR,exist_ok=True)
117
+
118
+ repo = Repository(
119
+ local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
120
+ )
121
+ repo.git_pull()
122
+
123
+
124
+ """
125
+ Save generated details
126
+ """
127
+ def dump_json(thing,file):
128
+ with open(file,'w+',encoding="utf8") as f:
129
+ json.dump(thing,f)
130
+
131
+ def flag(Method,input_image,text_output,ip_address,location):
132
+ try:
133
+ print("saving data------------------------")
134
+ adversarial_number = 0
135
+ adversarial_number = 0 if None else adversarial_number
136
+ metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
137
+ SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name)
138
+ os.makedirs(SAVE_FILE_DIR,exist_ok=True)
139
+ image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png')
140
+ try:
141
+ Image.fromarray(input_image).save(image_output_filename)
142
+
143
+ except Exception:
144
+ raise Exception(f"Had issues saving PIL image to file")
145
+ # Write metadata.json to file
146
+ json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl')
147
+ metadata= {'id':metadata_name,'method':Method,
148
+ 'File_name':'image.png','generated_text':text_output,
149
+ 'ip_address': ip_address,'loc': location}
150
+ dump_json(metadata,json_file_path)
151
+
152
+ # Simply upload the image file and metadata using the hub's
153
+ upload_file
154
+ # Upload the image
155
+ repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join
156
+ (metadata_name,'image.png'))
157
+
158
+ _ = upload_file(path_or_fileobj = image_output_filename,
159
+ path_in_repo =repo_image_path,
160
+ repo_id=DATASET_REPO_ID,
161
+ repo_type='dataset',
162
+ token=HF_TOKEN
163
+ )
164
+ # Upload the metadata
165
+ repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join
166
+ (metadata_name,'metadata.jsonl'))
167
+ _ = upload_file(path_or_fileobj = json_file_path,
168
+ path_in_repo =repo_json_path,
169
+ repo_id= DATASET_REPO_ID,
170
+ repo_type='dataset',
171
+ token=HF_TOKEN
172
+ )
173
+ adversarial_number+=1
174
+ repo.git_pull()
175
+ return "*****Logs save successfully!!!!"
176
+ except Exception as e:
177
+ return "Error whils saving logs -->"+ str(e)