Sudhanshu Pandey commited on
Commit
2e79922
·
1 Parent(s): 58f48eb

adding to spaces

Browse files
.gitattributes CHANGED
@@ -37,3 +37,4 @@ src/models/table-detection-and-extraction.pt filter=lfs diff=lfs merge=lfs -text
37
  *.png filter=lfs diff=lfs merge=lfs -text
38
  *.pdparams filter=lfs diff=lfs merge=lfs -text
39
  *.pdmodel filter=lfs diff=lfs merge=lfs -text
 
 
37
  *.png filter=lfs diff=lfs merge=lfs -text
38
  *.pdparams filter=lfs diff=lfs merge=lfs -text
39
  *.pdmodel filter=lfs diff=lfs merge=lfs -text
40
+ *.pdiparams filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *__pycache__
src/streamlit_app.py → app.py RENAMED
@@ -1,4 +1,4 @@
1
- from table_creator.table_extractor import TableExtraction
2
  import streamlit as st
3
  import base64
4
  from PIL import Image
@@ -244,7 +244,7 @@ with st.sidebar:
244
  st.markdown("""
245
  <div class="step-container">
246
  <div class="step-number">1</div>
247
- <div class="guide-text">Upload a document image containing a table (PNG, JPG, or JPEG format)</div>
248
  </div>
249
 
250
  <div class="step-container">
 
1
+ from src.table_creator.table_extractor import TableExtraction
2
  import streamlit as st
3
  import base64
4
  from PIL import Image
 
244
  st.markdown("""
245
  <div class="step-container">
246
  <div class="step-number">1</div>
247
+ <div class="guide-text">Upload a document image containing a table (JPG, or JPEG format)</div>
248
  </div>
249
 
250
  <div class="step-container">
src/models/paddleocr_models/det/inference.pdiparams ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83676ec730627ab4502f401410a4b6a3ce1c0bb98fa249b71db055b6bddae051
3
+ size 2377917
src/models/paddleocr_models/det/inference.pdmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4bfb1b05d9d1d5a760801eaf6d20180ef7e47bcc675fb17d1f3a89da5fef427
3
+ size 1590133
src/models/paddleocr_models/rec/inference.pdiparams ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f64a1ffb70c56b7a25655963ca16f5bf3286202e3f52ac972bee05cdee2f56
3
+ size 7607269
src/models/paddleocr_models/rec/inference.pdmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85b952f05f709af259cfe4254012aa7208bef0998f71f57a15495446f25ccd43
3
+ size 2517366
src/models/table_detector.py CHANGED
@@ -2,7 +2,9 @@ from pathlib import Path
2
  from typing import Optional, Union
3
  import numpy as np
4
  from ultralytics import YOLO
5
- # from ultralyticsplus import YOLO
 
 
6
 
7
 
8
  class TableDetector:
@@ -33,6 +35,14 @@ class TableDetector:
33
  self.min_conf = confidence
34
  self.iou = iou_threshold
35
 
 
 
 
 
 
 
 
 
36
  def detect(self, image_path: Union[str, Path]) -> Optional[np.ndarray]:
37
  """
38
  Detect tables in the given image.
@@ -45,11 +55,10 @@ class TableDetector:
45
  """
46
  results = self.model.predict(str(image_path), verbose=False, iou = self.iou, conf = self.min_conf)
47
  if results:
48
- print('boxes :\n',results[0])
49
  boxes = results[0].boxes.xyxy.numpy()
50
  cord = self.merge_boxes(boxes)
51
- print('cords : ',cord)
52
  return [sorted(cord, key = lambda x : (x[2]-x[0])* (x[3]-x[1]), reverse=True)[0]] if len(cord) > 0 else []
 
53
  return None
54
 
55
  def merge_boxes(self, boxes: np.ndarray, overlap_threshold: float = 35) -> np.ndarray:
 
2
  from typing import Optional, Union
3
  import numpy as np
4
  from ultralytics import YOLO
5
+ from ultralyticsplus import YOLO as YOLO2
6
+
7
+
8
 
9
 
10
  class TableDetector:
 
35
  self.min_conf = confidence
36
  self.iou = iou_threshold
37
 
38
+ def load_model(self):
39
+ model = YOLO2('foduucom/table-detection-and-extraction')
40
+ model.overrides['conf'] = 0.25 # NMS confidence threshold
41
+ model.overrides['iou'] = 0.45 # NMS IoU threshold
42
+ model.overrides['agnostic_nms'] = False # NMS class-agnostic
43
+ model.overrides['max_det'] = 1000
44
+ return model
45
+
46
  def detect(self, image_path: Union[str, Path]) -> Optional[np.ndarray]:
47
  """
48
  Detect tables in the given image.
 
55
  """
56
  results = self.model.predict(str(image_path), verbose=False, iou = self.iou, conf = self.min_conf)
57
  if results:
 
58
  boxes = results[0].boxes.xyxy.numpy()
59
  cord = self.merge_boxes(boxes)
 
60
  return [sorted(cord, key = lambda x : (x[2]-x[0])* (x[3]-x[1]), reverse=True)[0]] if len(cord) > 0 else []
61
+
62
  return None
63
 
64
  def merge_boxes(self, boxes: np.ndarray, overlap_threshold: float = 35) -> np.ndarray:
src/table_creator/__pycache__/data_structures.cpython-312.pyc CHANGED
Binary files a/src/table_creator/__pycache__/data_structures.cpython-312.pyc and b/src/table_creator/__pycache__/data_structures.cpython-312.pyc differ
 
src/table_creator/__pycache__/table_extractor.cpython-312.pyc CHANGED
Binary files a/src/table_creator/__pycache__/table_extractor.cpython-312.pyc and b/src/table_creator/__pycache__/table_extractor.cpython-312.pyc differ
 
src/table_creator/table_extractor.py CHANGED
@@ -1,6 +1,6 @@
1
- from models.table_detector import TableDetector
2
- from models.text_recognizer import TextRecognizer
3
- from table_creator.data_structures import TableStructure
4
  import pandas as pd
5
  import re
6
 
 
1
+ from src.models.table_detector import TableDetector
2
+ from src.models.text_recognizer import TextRecognizer
3
+ from src.table_creator.data_structures import TableStructure
4
  import pandas as pd
5
  import re
6
 
yolo11n.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ebbc80d4a7680d14987a577cd21342b65ecfd94632bd9a8da63ae6417644ee1
3
+ size 5613764