Spaces:

JabriA
/

OCR

Running

OCR

File size: 1,447 Bytes

84f2a1e
e665f54
b01b23a
 
84f2a1e
e665f54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7732ecc
 
e665f54
7732ecc
 
 
e665f54
 
7732ecc
 
e665f54
 
 
 
b01b23a
 
e665f54
b01b23a
 
 
e665f54
 
b01b23a
 
7732ecc
e665f54
b01b23a
 
 
7732ecc

import gradio as gr
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import torch

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained(
    'ucaslcl/GOT-OCR2_0',
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    device_map='cuda' if torch.cuda.is_available() else 'cpu',
    use_safetensors=True,
    pad_token_id=tokenizer.eos_token_id
)
model = model.eval()
if torch.cuda.is_available():
    model = model.cuda()

# OCR function
def ocr_from_image(image_file, ocr_type):
    if image_file is None:
        return "Please upload an image."

    # Ouvrir le fichier image avec PIL
    image = Image.open(image_file).convert("RGB")
    image_path = "uploaded_image.jpg"
    image.save(image_path)

    # Passer le chemin au modele
    res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
    return res

# OCR types to choose from
ocr_types = ["ocr", "format"]

# Gradio interface
iface = gr.Interface(
    fn=ocr_from_image,
    inputs=[
        gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
        gr.Radio(ocr_types, label="OCR Type", value="ocr")
    ],
    outputs="text",
    title="?? GOT-OCR2.0 Transformer OCR",
    description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
)

if __name__ == "__main__":
    iface.launch(share=True)