Abderrahmane Jabri commited on
Commit
7732ecc
·
1 Parent(s): e665f54

Initial commit3

Browse files
Files changed (2) hide show
  1. app.py +9 -4
  2. app.py.bak +30 -35
app.py CHANGED
@@ -18,11 +18,16 @@ if torch.cuda.is_available():
18
  model = model.cuda()
19
 
20
  # OCR function
21
- def ocr_from_image(image, ocr_type):
22
- if image is None:
23
  return "Please upload an image."
 
 
 
24
  image_path = "uploaded_image.jpg"
25
  image.save(image_path)
 
 
26
  res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
27
  return res
28
 
@@ -37,9 +42,9 @@ iface = gr.Interface(
37
  gr.Radio(ocr_types, label="OCR Type", value="ocr")
38
  ],
39
  outputs="text",
40
- title="🧠 GOT-OCR2.0 Transformer OCR",
41
  description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
42
  )
43
 
44
  if __name__ == "__main__":
45
- iface.launch()
 
18
  model = model.cuda()
19
 
20
  # OCR function
21
+ def ocr_from_image(image_file, ocr_type):
22
+ if image_file is None:
23
  return "Please upload an image."
24
+
25
+ # Ouvrir le fichier image avec PIL
26
+ image = Image.open(image_file).convert("RGB")
27
  image_path = "uploaded_image.jpg"
28
  image.save(image_path)
29
+
30
+ # Passer le chemin au modele
31
  res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
32
  return res
33
 
 
42
  gr.Radio(ocr_types, label="OCR Type", value="ocr")
43
  ],
44
  outputs="text",
45
+ title="?? GOT-OCR2.0 Transformer OCR",
46
  description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
47
  )
48
 
49
  if __name__ == "__main__":
50
+ iface.launch(share=True)
app.py.bak CHANGED
@@ -1,50 +1,45 @@
1
- import os
2
  import gradio as gr
3
- from transformers import TrOCRProcessor, TrOCRForConditionalGeneration
4
  from PIL import Image
5
  import torch
6
 
7
- # ?? Chargement du modele et du processor
8
- model_name = "microsoft/trocr-base-handwritten"
9
- model = TrOCRForConditionalGeneration.from_pretrained(model_name)
10
- processor = TrOCRProcessor.from_pretrained(model_name)
11
-
12
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
- model.to(device)
14
- model.eval()
15
-
16
- # ?? Fonction OCR
17
- def ocr_from_image(image_file, ocr_type):
18
- if image_file is None:
19
- return "Veuillez importer une image."
20
-
21
- # Pretraitement de l'image
22
- image = Image.open(image_file.name).convert("RGB")
23
- pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
24
-
25
- # Generation de texte
26
- with torch.no_grad():
27
- generated_ids = model.generate(pixel_values)
28
-
29
- # Decodage du texte genere
30
- generated_text = processor.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
31
- return generated_text
32
-
33
- # ?? Types d'OCR (juste pour l'interface ici)
34
  ocr_types = ["ocr", "format"]
35
 
36
- # ?? Interface Gradio
37
  iface = gr.Interface(
38
  fn=ocr_from_image,
39
  inputs=[
40
- gr.File(label="Importer une image", file_types=[".jpg", ".jpeg", ".png"]),
41
- gr.Radio(ocr_types, label="Type d'OCR", value="ocr")
42
  ],
43
  outputs="text",
44
- title="?? OCR manuscrit avec TrOCR",
45
- description="Importez une image manuscrite pour extraire le texte avec le modele Microsoft TrOCR."
46
  )
47
 
48
- # ?? Lancement
49
  if __name__ == "__main__":
50
  iface.launch()
 
 
1
  import gradio as gr
2
+ from transformers import AutoModel, AutoTokenizer
3
  from PIL import Image
4
  import torch
5
 
6
+ # Load model and tokenizer
7
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
8
+ model = AutoModel.from_pretrained(
9
+ 'ucaslcl/GOT-OCR2_0',
10
+ trust_remote_code=True,
11
+ low_cpu_mem_usage=True,
12
+ device_map='cuda' if torch.cuda.is_available() else 'cpu',
13
+ use_safetensors=True,
14
+ pad_token_id=tokenizer.eos_token_id
15
+ )
16
+ model = model.eval()
17
+ if torch.cuda.is_available():
18
+ model = model.cuda()
19
+
20
+ # OCR function
21
+ def ocr_from_image(image, ocr_type):
22
+ if image is None:
23
+ return "Please upload an image."
24
+ image_path = "uploaded_image.jpg"
25
+ image.save(image_path)
26
+ res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
27
+ return res
28
+
29
+ # OCR types to choose from
 
 
 
30
  ocr_types = ["ocr", "format"]
31
 
32
+ # Gradio interface
33
  iface = gr.Interface(
34
  fn=ocr_from_image,
35
  inputs=[
36
+ gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
37
+ gr.Radio(ocr_types, label="OCR Type", value="ocr")
38
  ],
39
  outputs="text",
40
+ title="🧠 GOT-OCR2.0 Transformer OCR",
41
+ description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
42
  )
43
 
 
44
  if __name__ == "__main__":
45
  iface.launch()