Spaces:
Sleeping
Sleeping
| from dotenv import load_dotenv | |
| import io | |
| import boto3 | |
| import os | |
| from PIL import ImageFilter | |
| import numpy as np | |
| def textract_ocr(image, box): | |
| load_dotenv() | |
| x1, y1, x2, y2 = box | |
| cropped_image = image.crop((x1, y1, x2, y2)) | |
| cropped_image = cropped_image.convert("L") | |
| img_bytes = io.BytesIO() | |
| cropped_image.save(img_bytes, format='PNG') | |
| img_bytes = img_bytes.getvalue() | |
| client = boto3.client('textract', region_name='eu-west-3', aws_access_key_id=os.getenv("aws_access_key_id"), | |
| aws_secret_access_key=os.getenv('aws_secret_access_key') | |
| ) | |
| response = client.detect_document_text(Document={'Bytes': img_bytes}) | |
| blocks = response['Blocks'] | |
| texttract = "" | |
| line_confidence = {} | |
| for block in blocks: | |
| if(block['BlockType'] == 'LINE'): | |
| line_confidence[block['Text']] = block['Confidence'] | |
| texttract+= block['Text']+"\n" | |
| return texttract | |