JabriA commited on
Commit
b35952c
·
1 Parent(s): b01b23a

Initial commit

Browse files
Files changed (3) hide show
  1. app.py +35 -30
  2. app.py.bak +8 -5
  3. app1.py.bak +54 -0
app.py CHANGED
@@ -1,45 +1,50 @@
 
1
  import gradio as gr
2
- from transformers import AutoModel, AutoTokenizer
3
  from PIL import Image
4
  import torch
5
 
6
- # Load model and tokenizer
7
- tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
8
- model = AutoModel.from_pretrained(
9
- 'ucaslcl/GOT-OCR2_0',
10
- trust_remote_code=True,
11
- low_cpu_mem_usage=True,
12
- device_map='cuda' if torch.cuda.is_available() else 'cpu',
13
- use_safetensors=True,
14
- pad_token_id=tokenizer.eos_token_id
15
- )
16
- model = model.eval()
17
- if torch.cuda.is_available():
18
- model = model.cuda()
19
-
20
- # OCR function
21
- def ocr_from_image(image, ocr_type):
22
- if image is None:
23
- return "Please upload an image."
24
- image_path = "uploaded_image.jpg"
25
- image.save(image_path)
26
- res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
27
- return res
28
-
29
- # OCR types to choose from
 
 
 
30
  ocr_types = ["ocr", "format"]
31
 
32
- # Gradio interface
33
  iface = gr.Interface(
34
  fn=ocr_from_image,
35
  inputs=[
36
- gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
37
- gr.Radio(ocr_types, label="OCR Type", value="ocr")
38
  ],
39
  outputs="text",
40
- title="🧠 GOT-OCR2.0 Transformer OCR",
41
- description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
42
  )
43
 
 
44
  if __name__ == "__main__":
45
  iface.launch()
 
1
+ import os
2
  import gradio as gr
3
+ from transformers import TrOCRProcessor, TrOCRForConditionalGeneration
4
  from PIL import Image
5
  import torch
6
 
7
+ # ?? Chargement du modele et du processor
8
+ model_name = "microsoft/trocr-base-handwritten"
9
+ model = TrOCRForConditionalGeneration.from_pretrained(model_name)
10
+ processor = TrOCRProcessor.from_pretrained(model_name)
11
+
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ model.to(device)
14
+ model.eval()
15
+
16
+ # ?? Fonction OCR
17
+ def ocr_from_image(image_file, ocr_type):
18
+ if image_file is None:
19
+ return "Veuillez importer une image."
20
+
21
+ # Pretraitement de l'image
22
+ image = Image.open(image_file.name).convert("RGB")
23
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
24
+
25
+ # Generation de texte
26
+ with torch.no_grad():
27
+ generated_ids = model.generate(pixel_values)
28
+
29
+ # Decodage du texte genere
30
+ generated_text = processor.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
31
+ return generated_text
32
+
33
+ # ?? Types d'OCR (juste pour l'interface ici)
34
  ocr_types = ["ocr", "format"]
35
 
36
+ # ?? Interface Gradio
37
  iface = gr.Interface(
38
  fn=ocr_from_image,
39
  inputs=[
40
+ gr.File(label="Importer une image", file_types=[".jpg", ".jpeg", ".png"]),
41
+ gr.Radio(ocr_types, label="Type d'OCR", value="ocr")
42
  ],
43
  outputs="text",
44
+ title="?? OCR manuscrit avec TrOCR",
45
+ description="Importez une image manuscrite pour extraire le texte avec le modele Microsoft TrOCR."
46
  )
47
 
48
+ # ?? Lancement
49
  if __name__ == "__main__":
50
  iface.launch()
app.py.bak CHANGED
@@ -19,23 +19,26 @@ if torch.cuda.is_available():
19
 
20
  # OCR function
21
  def ocr_from_image(image, ocr_type):
22
- image_path = "temp.jpg"
 
 
23
  image.save(image_path)
24
  res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
25
  return res
26
 
27
- # Gradio interface
28
  ocr_types = ["ocr", "format"]
29
 
 
30
  iface = gr.Interface(
31
  fn=ocr_from_image,
32
  inputs=[
33
- gr.Image(type="pil", label="Upload Image"),
34
  gr.Radio(ocr_types, label="OCR Type", value="ocr")
35
  ],
36
  outputs="text",
37
- title="GOT-OCR2.0: OCR with Transformers",
38
- description="Upload an image and select OCR type (plain text or formatted)."
39
  )
40
 
41
  if __name__ == "__main__":
 
19
 
20
  # OCR function
21
  def ocr_from_image(image, ocr_type):
22
+ if image is None:
23
+ return "Please upload an image."
24
+ image_path = "uploaded_image.jpg"
25
  image.save(image_path)
26
  res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
27
  return res
28
 
29
+ # OCR types to choose from
30
  ocr_types = ["ocr", "format"]
31
 
32
+ # Gradio interface
33
  iface = gr.Interface(
34
  fn=ocr_from_image,
35
  inputs=[
36
+ gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
37
  gr.Radio(ocr_types, label="OCR Type", value="ocr")
38
  ],
39
  outputs="text",
40
+ title="🧠 GOT-OCR2.0 Transformer OCR",
41
+ description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
42
  )
43
 
44
  if __name__ == "__main__":
app1.py.bak ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import TrOCRProcessor, TrOCRForConditionalGeneration
4
+ from PIL import Image
5
+ import torch
6
+
7
+ # 🛡️ Configuration du proxy si nécessaire
8
+ os.environ["HTTP_PROXY"] = "http://meditelproxy.meditel.int:80"
9
+ os.environ["HTTPS_PROXY"] = "http://meditelproxy.meditel.int:80"
10
+
11
+ # 🔄 Chargement du modèle et du processor
12
+ model_name = "microsoft/trocr-base-handwritten"
13
+ model = TrOCRForConditionalGeneration.from_pretrained(model_name)
14
+ processor = TrOCRProcessor.from_pretrained(model_name)
15
+
16
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
+ model.to(device)
18
+ model.eval()
19
+
20
+ # 🧠 Fonction OCR
21
+ def ocr_from_image(image_file, ocr_type):
22
+ if image_file is None:
23
+ return "Veuillez importer une image."
24
+
25
+ # Prétraitement de l'image
26
+ image = Image.open(image_file.name).convert("RGB")
27
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
28
+
29
+ # Génération de texte
30
+ with torch.no_grad():
31
+ generated_ids = model.generate(pixel_values)
32
+
33
+ # Décodage du texte généré
34
+ generated_text = processor.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
35
+ return generated_text
36
+
37
+ # 🔘 Types d’OCR (juste pour l’interface ici)
38
+ ocr_types = ["ocr", "format"]
39
+
40
+ # 🎨 Interface Gradio
41
+ iface = gr.Interface(
42
+ fn=ocr_from_image,
43
+ inputs=[
44
+ gr.File(label="Importer une image", file_types=[".jpg", ".jpeg", ".png"]),
45
+ gr.Radio(ocr_types, label="Type d’OCR", value="ocr")
46
+ ],
47
+ outputs="text",
48
+ title="🧠 OCR manuscrit avec TrOCR",
49
+ description="Importez une image manuscrite pour extraire le texte avec le modèle Microsoft TrOCR."
50
+ )
51
+
52
+ # 🚀 Lancement
53
+ if __name__ == "__main__":
54
+ iface.launch()