Sudhanshu Pandey
commited on
Commit
·
a7b8c18
1
Parent(s):
b1cb0f5
Update the files
Browse files- LICENSE +21 -0
- images/image3.png +0 -0
- images/image4.png +0 -0
- requirements.txt +114 -0
- src/__init__.py +0 -0
- src/models/__init__.py +0 -0
- src/models/__pycache__/__init__.cpython-312.pyc +0 -0
- src/models/__pycache__/table_detector.cpython-312.pyc +0 -0
- src/models/__pycache__/text_recognizer.cpython-312.pyc +0 -0
- src/models/paddleocr_models/det/inference.pdiparams.info +0 -0
- src/models/paddleocr_models/rec/inference.pdiparams.info +0 -0
- src/models/table_detector.py +123 -0
- src/models/text_recognizer.py +115 -0
- src/streamlit_app.py +475 -0
- src/table_creator/__pycache__/data_structures.cpython-312.pyc +0 -0
- src/table_creator/__pycache__/table_extractor.cpython-312.pyc +0 -0
- src/table_creator/data_structures.py +177 -0
- src/table_creator/table_extractor.py +148 -0
- src/table_creator/visualization.py +93 -0
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 Sudhanshu Pandey
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
images/image3.png
ADDED
![]() |
images/image4.png
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
albucore==0.0.13
|
2 |
+
albumentations==1.4.10
|
3 |
+
altair==5.5.0
|
4 |
+
annotated-types==0.7.0
|
5 |
+
anyio==4.8.0
|
6 |
+
asgiref==3.8.1
|
7 |
+
astor==0.8.1
|
8 |
+
attrs==24.3.0
|
9 |
+
beautifulsoup4==4.12.3
|
10 |
+
blinker==1.9.0
|
11 |
+
cachetools==5.5.0
|
12 |
+
certifi==2024.12.14
|
13 |
+
charset-normalizer==3.4.1
|
14 |
+
click==8.1.8
|
15 |
+
contourpy==1.3.1
|
16 |
+
crispy-bootstrap4==2024.10
|
17 |
+
cycler==0.12.1
|
18 |
+
Cython==3.0.11
|
19 |
+
decorator==5.1.1
|
20 |
+
filelock==3.16.1
|
21 |
+
filetype==1.2.0
|
22 |
+
fire==0.7.0
|
23 |
+
fonttools==4.55.3
|
24 |
+
fsspec==2024.12.0
|
25 |
+
gitdb==4.0.12
|
26 |
+
GitPython==3.1.44
|
27 |
+
h11==0.14.0
|
28 |
+
httpcore==1.0.7
|
29 |
+
httpx==0.28.1
|
30 |
+
hub-sdk==0.0.17
|
31 |
+
huggingface-hub==0.27.1
|
32 |
+
idna==3.7
|
33 |
+
imageio==2.36.1
|
34 |
+
imgaug==0.4.0
|
35 |
+
Jinja2==3.1.5
|
36 |
+
joblib==1.4.2
|
37 |
+
jsonschema==4.23.0
|
38 |
+
jsonschema-specifications==2024.10.1
|
39 |
+
kiwisolver==1.4.8
|
40 |
+
lazy_loader==0.4
|
41 |
+
lmdb==1.6.2
|
42 |
+
lxml==5.3.0
|
43 |
+
markdown-it-py==3.0.0
|
44 |
+
MarkupSafe==3.0.2
|
45 |
+
matplotlib==3.10.0
|
46 |
+
mdurl==0.1.2
|
47 |
+
mpmath==1.3.0
|
48 |
+
narwhals==1.22.0
|
49 |
+
networkx==3.4.2
|
50 |
+
numpy==1.26.4
|
51 |
+
opencv-contrib-python==4.10.0.84
|
52 |
+
opencv-python==4.10.0.84
|
53 |
+
opencv-python-headless==4.10.0.84
|
54 |
+
opt-einsum==3.3.0
|
55 |
+
packaging==24.2
|
56 |
+
paddleocr==2.9.1
|
57 |
+
paddlepaddle==2.6.2
|
58 |
+
pandas==2.2.3
|
59 |
+
pillow==11.1.0
|
60 |
+
protobuf==3.20.3
|
61 |
+
psutil==6.1.1
|
62 |
+
py-cpuinfo==9.0.0
|
63 |
+
pyarrow==18.1.0
|
64 |
+
pybboxes==0.1.6
|
65 |
+
pyclipper==1.3.0.post6
|
66 |
+
pydantic==2.10.5
|
67 |
+
pydantic_core==2.27.2
|
68 |
+
pydeck==0.9.1
|
69 |
+
Pygments==2.19.1
|
70 |
+
pyparsing==3.2.1
|
71 |
+
python-dateutil==2.9.0.post0
|
72 |
+
python-docx==1.1.2
|
73 |
+
python-dotenv==1.0.1
|
74 |
+
pytz==2024.2
|
75 |
+
PyYAML==6.0.2
|
76 |
+
RapidFuzz==3.11.0
|
77 |
+
referencing==0.35.1
|
78 |
+
requests==2.32.3
|
79 |
+
requests-toolbelt==1.0.0
|
80 |
+
rich==13.9.4
|
81 |
+
roboflow==1.1.50
|
82 |
+
rpds-py==0.22.3
|
83 |
+
sahi==0.11.20
|
84 |
+
scikit-image==0.25.0
|
85 |
+
scikit-learn==1.6.1
|
86 |
+
scipy==1.15.1
|
87 |
+
seaborn==0.13.2
|
88 |
+
setuptools==75.8.0
|
89 |
+
shapely==2.0.6
|
90 |
+
six==1.17.0
|
91 |
+
smmap==5.0.2
|
92 |
+
sniffio==1.3.1
|
93 |
+
soupsieve==2.6
|
94 |
+
sqlparse==0.5.3
|
95 |
+
streamlit==1.41.1
|
96 |
+
sympy==1.13.1
|
97 |
+
tenacity==9.0.0
|
98 |
+
termcolor==2.5.0
|
99 |
+
terminaltables==3.1.10
|
100 |
+
thop==0.1.1.post2209072238
|
101 |
+
threadpoolctl==3.5.0
|
102 |
+
tifffile==2025.1.10
|
103 |
+
toml==0.10.2
|
104 |
+
tomli==2.2.1
|
105 |
+
torch==2.5.1
|
106 |
+
torchvision==0.20.1
|
107 |
+
tornado==6.4.2
|
108 |
+
tqdm==4.67.1
|
109 |
+
typing_extensions==4.12.2
|
110 |
+
tzdata==2024.2
|
111 |
+
ultralytics==8.3.65
|
112 |
+
ultralytics-thop==2.0.14
|
113 |
+
ultralyticsplus==0.1.0
|
114 |
+
urllib3==2.3.0
|
src/__init__.py
ADDED
File without changes
|
src/models/__init__.py
ADDED
File without changes
|
src/models/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (170 Bytes). View file
|
|
src/models/__pycache__/table_detector.cpython-312.pyc
ADDED
Binary file (5.76 kB). View file
|
|
src/models/__pycache__/text_recognizer.cpython-312.pyc
ADDED
Binary file (6.64 kB). View file
|
|
src/models/paddleocr_models/det/inference.pdiparams.info
ADDED
Binary file (26.4 kB). View file
|
|
src/models/paddleocr_models/rec/inference.pdiparams.info
ADDED
Binary file (103 kB). View file
|
|
src/models/table_detector.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from typing import Optional, Union
|
3 |
+
import numpy as np
|
4 |
+
from ultralytics import YOLO
|
5 |
+
# from ultralyticsplus import YOLO
|
6 |
+
|
7 |
+
|
8 |
+
class TableDetector:
|
9 |
+
"""
|
10 |
+
A class for detecting tables in document images using YOLO models.
|
11 |
+
|
12 |
+
Attributes:
|
13 |
+
model_path (Path): Path to the YOLO model weights
|
14 |
+
confidence (float): Confidence threshold for detection
|
15 |
+
iou_threshold (float): IoU threshold for NMS
|
16 |
+
"""
|
17 |
+
|
18 |
+
def __init__(
|
19 |
+
self,
|
20 |
+
confidence: float = 0.50,
|
21 |
+
iou_threshold: float = 0.45
|
22 |
+
) -> None:
|
23 |
+
"""
|
24 |
+
Initialize the TableDetector with model and parameters.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
model_path: Path to the YOLO model weights
|
28 |
+
confidence: Confidence threshold for detection
|
29 |
+
iou_threshold: IoU threshold for NMS
|
30 |
+
"""
|
31 |
+
self.model_path = 'src/models/table-detection-and-extraction.pt'
|
32 |
+
self.model = YOLO(str(self.model_path))
|
33 |
+
self.min_conf = confidence
|
34 |
+
self.iou = iou_threshold
|
35 |
+
|
36 |
+
def detect(self, image_path: Union[str, Path]) -> Optional[np.ndarray]:
|
37 |
+
"""
|
38 |
+
Detect tables in the given image.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
image_path: Path to the input image
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
Array of bounding box coordinates or None if no tables detected
|
45 |
+
"""
|
46 |
+
results = self.model.predict(str(image_path), verbose=False, iou = self.iou, conf = self.min_conf)
|
47 |
+
if results:
|
48 |
+
print('boxes :\n',results[0])
|
49 |
+
boxes = results[0].boxes.xyxy.numpy()
|
50 |
+
cord = self.merge_boxes(boxes)
|
51 |
+
print('cords : ',cord)
|
52 |
+
return [sorted(cord, key = lambda x : (x[2]-x[0])* (x[3]-x[1]), reverse=True)[0]] if len(cord) > 0 else []
|
53 |
+
return None
|
54 |
+
|
55 |
+
def merge_boxes(self, boxes: np.ndarray, overlap_threshold: float = 35) -> np.ndarray:
|
56 |
+
"""
|
57 |
+
Merge overlapping bounding boxes.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
boxes: Array of bounding box coordinates
|
61 |
+
overlap_threshold: Threshold for merging overlapping boxes
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
Array of merged bounding box coordinates
|
65 |
+
"""
|
66 |
+
# Sort boxes by area in descending order
|
67 |
+
areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
68 |
+
sorted_indices = np.argsort(-areas)
|
69 |
+
boxes = boxes[sorted_indices]
|
70 |
+
|
71 |
+
merged_boxes = []
|
72 |
+
|
73 |
+
for box in boxes:
|
74 |
+
if not merged_boxes:
|
75 |
+
merged_boxes.append(box)
|
76 |
+
continue
|
77 |
+
|
78 |
+
overlap_found = False
|
79 |
+
for i, merged_box in enumerate(merged_boxes):
|
80 |
+
iou = self._calculate_overlap(box, merged_box)
|
81 |
+
if iou > overlap_threshold:
|
82 |
+
# Keep the larger box
|
83 |
+
box_area = (box[2] - box[0]) * (box[3] - box[1])
|
84 |
+
merged_area = (merged_box[2] - merged_box[0]) * (merged_box[3] - merged_box[1])
|
85 |
+
if box_area > merged_area:
|
86 |
+
merged_boxes[i] = box
|
87 |
+
overlap_found = True
|
88 |
+
break
|
89 |
+
|
90 |
+
if not overlap_found:
|
91 |
+
merged_boxes.append(box)
|
92 |
+
|
93 |
+
return np.array(merged_boxes).astype(int)
|
94 |
+
|
95 |
+
@staticmethod
|
96 |
+
def _calculate_overlap(box1: np.ndarray, box2: np.ndarray) -> float:
|
97 |
+
"""
|
98 |
+
Calculate the percentage overlap between two boxes.
|
99 |
+
|
100 |
+
Args:
|
101 |
+
box1: First bounding box coordinates
|
102 |
+
box2: Second bounding box coordinates
|
103 |
+
|
104 |
+
Returns:
|
105 |
+
Percentage of overlap between the boxes
|
106 |
+
"""
|
107 |
+
x_left = max(box1[0], box2[0])
|
108 |
+
y_top = max(box1[1], box2[1])
|
109 |
+
x_right = min(box1[2], box2[2])
|
110 |
+
y_bottom = min(box1[3], box2[3])
|
111 |
+
|
112 |
+
if x_right < x_left or y_bottom < y_top:
|
113 |
+
return 0.0
|
114 |
+
|
115 |
+
intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
116 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
117 |
+
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
118 |
+
|
119 |
+
min_area = min(box1_area, box2_area)
|
120 |
+
if min_area == 0:
|
121 |
+
return 0.0
|
122 |
+
|
123 |
+
return (intersection_area / min_area) * 100
|
src/models/text_recognizer.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from typing import List, Optional, Dict, Union
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
from paddleocr import PaddleOCR
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
class TextRecognizer:
|
9 |
+
"""
|
10 |
+
A class for performing OCR on detected tables using PaddleOCR.
|
11 |
+
|
12 |
+
Attributes:
|
13 |
+
models_dir (Path): Directory containing OCR model files
|
14 |
+
"""
|
15 |
+
|
16 |
+
def __init__(self, models_dir: Optional[Union[str, Path]] = None) -> None:
|
17 |
+
"""
|
18 |
+
Initialize the TextRecognizer with model directory.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
models_dir: Directory containing OCR model files
|
22 |
+
"""
|
23 |
+
self.models_dir = Path(models_dir) if models_dir else Path(__file__).parent / 'paddleocr_models'
|
24 |
+
self._setup_model_dirs()
|
25 |
+
|
26 |
+
self.model = PaddleOCR(
|
27 |
+
use_angle_cls=False,
|
28 |
+
lang='en',
|
29 |
+
det_model_dir=str(self.models_dir / 'det'),
|
30 |
+
rec_model_dir=str(self.models_dir / 'rec')
|
31 |
+
)
|
32 |
+
|
33 |
+
def _setup_model_dirs(self) -> None:
|
34 |
+
"""Create necessary directories for model files."""
|
35 |
+
(self.models_dir / 'det').mkdir(parents=True, exist_ok=True)
|
36 |
+
(self.models_dir / 'rec').mkdir(parents=True, exist_ok=True)
|
37 |
+
|
38 |
+
def recognize(
|
39 |
+
self,
|
40 |
+
image_path: Union[str, Path],
|
41 |
+
table_boxes: Optional[np.ndarray] = None,
|
42 |
+
padding: tuple = (0, 0)
|
43 |
+
) -> List[pd.DataFrame]:
|
44 |
+
"""
|
45 |
+
Perform OCR on the image within specified table regions.
|
46 |
+
|
47 |
+
Args:
|
48 |
+
image_path: Path to the input image
|
49 |
+
table_boxes: Array of table bounding box coordinates
|
50 |
+
padding: Padding to add around table regions (x, y)
|
51 |
+
|
52 |
+
Returns:
|
53 |
+
List of DataFrames containing extracted text and positions
|
54 |
+
"""
|
55 |
+
with Image.open(image_path) as img:
|
56 |
+
img_array = np.array(img.convert('RGB'))
|
57 |
+
|
58 |
+
if table_boxes is not None and len(table_boxes) == 1:
|
59 |
+
pad_x, pad_y = padding
|
60 |
+
box = table_boxes[0]
|
61 |
+
img_array = img_array[
|
62 |
+
max(box[1]-pad_y, 0):box[3]+pad_y,
|
63 |
+
max(box[0]-pad_x, 0):box[2]+pad_x
|
64 |
+
]
|
65 |
+
|
66 |
+
ocr_result = self.model.ocr(img_array)
|
67 |
+
|
68 |
+
if table_boxes is not None and len(table_boxes) > 1:
|
69 |
+
return self._process_multiple_tables(ocr_result[0], table_boxes)
|
70 |
+
return self._process_single_table(ocr_result[0])
|
71 |
+
|
72 |
+
def _process_multiple_tables(
|
73 |
+
self,
|
74 |
+
ocr_data: List,
|
75 |
+
table_boxes: np.ndarray
|
76 |
+
) -> List[pd.DataFrame]:
|
77 |
+
"""Process OCR results for multiple tables."""
|
78 |
+
result: Dict[int, List] = {}
|
79 |
+
|
80 |
+
for item in ocr_data:
|
81 |
+
bbox = np.array(item[0]).astype(int)
|
82 |
+
word = item[1][0]
|
83 |
+
bbox = [bbox[:,0].min(), bbox[:,1].min(), bbox[:,0].max(), bbox[:,1].max()]
|
84 |
+
|
85 |
+
for idx, table_box in enumerate(table_boxes):
|
86 |
+
if (bbox[0] >= table_box[0] and bbox[1] >= table_box[1] and
|
87 |
+
bbox[0] <= table_box[2] and bbox[1] <= table_box[3]):
|
88 |
+
if idx not in result:
|
89 |
+
result[idx] = []
|
90 |
+
result[idx].append((word, bbox))
|
91 |
+
|
92 |
+
return [
|
93 |
+
pd.DataFrame(
|
94 |
+
sorted(table_data, key=lambda x: (x[1][1], x[1][0])),
|
95 |
+
columns=['text', 'boundingBox']
|
96 |
+
)
|
97 |
+
for table_data in result.values()
|
98 |
+
]
|
99 |
+
|
100 |
+
def _process_single_table(self, ocr_data: List) -> List[pd.DataFrame]:
|
101 |
+
"""Process OCR results for a single table."""
|
102 |
+
processed_data = [
|
103 |
+
(item[1][0], [
|
104 |
+
np.array(item[0])[:,0].min(),
|
105 |
+
np.array(item[0])[:,1].min(),
|
106 |
+
np.array(item[0])[:,0].max(),
|
107 |
+
np.array(item[0])[:,1].max()
|
108 |
+
])
|
109 |
+
for item in ocr_data
|
110 |
+
]
|
111 |
+
|
112 |
+
return [pd.DataFrame(
|
113 |
+
sorted(processed_data, key=lambda x: (x[1][1], x[1][0])),
|
114 |
+
columns=['text', 'boundingBox']
|
115 |
+
)]
|
src/streamlit_app.py
ADDED
@@ -0,0 +1,475 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from table_creator.table_extractor import TableExtraction
|
2 |
+
import streamlit as st
|
3 |
+
import base64
|
4 |
+
from PIL import Image
|
5 |
+
import os
|
6 |
+
import cv2
|
7 |
+
import numpy as np
|
8 |
+
import tempfile
|
9 |
+
import traceback
|
10 |
+
|
11 |
+
# Load models only once
|
12 |
+
if 'tab_ext' not in st.session_state:
|
13 |
+
st.session_state.tab_ext = TableExtraction()
|
14 |
+
print('Models loaded.')
|
15 |
+
|
16 |
+
|
17 |
+
def process_image(imgpath):
|
18 |
+
return st.session_state.tab_ext.detect(imgpath)
|
19 |
+
|
20 |
+
def draw_bounding_box(image, bbox):
|
21 |
+
"""Draw a bounding box on the image"""
|
22 |
+
|
23 |
+
|
24 |
+
img_array = np.array(image)
|
25 |
+
if len(img_array.shape) == 3:
|
26 |
+
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
27 |
+
|
28 |
+
x_min, y_min, x_max, y_max = bbox
|
29 |
+
cv2.rectangle(img_array, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
|
30 |
+
|
31 |
+
if len(img_array.shape) == 3:
|
32 |
+
img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
|
33 |
+
|
34 |
+
return Image.fromarray(img_array)
|
35 |
+
|
36 |
+
|
37 |
+
# Set page config
|
38 |
+
st.set_page_config(
|
39 |
+
page_title="Table Extraction Tool",
|
40 |
+
layout="wide",
|
41 |
+
initial_sidebar_state="expanded" # Changed to expanded to show guide by default
|
42 |
+
)
|
43 |
+
|
44 |
+
|
45 |
+
# Enhanced CSS styling with updated upload section
|
46 |
+
st.markdown("""
|
47 |
+
<style>
|
48 |
+
/* Main container and background */
|
49 |
+
.main { padding: 1.5rem; }
|
50 |
+
.stApp {
|
51 |
+
background: linear-gradient(135deg, #f6f9fc 0%, #f0f4f8 100%);
|
52 |
+
}
|
53 |
+
|
54 |
+
/* Header styling */
|
55 |
+
.main-header {
|
56 |
+
background: linear-gradient(90deg, #1a365d 0%, #2563eb 100%);
|
57 |
+
color: white;
|
58 |
+
padding: 2rem 3rem;
|
59 |
+
border-radius: 15px;
|
60 |
+
margin-bottom: 2rem;
|
61 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
62 |
+
}
|
63 |
+
|
64 |
+
.main-header h1 {
|
65 |
+
font-size: 2.5rem;
|
66 |
+
margin-bottom: 0.5rem;
|
67 |
+
font-weight: 600;
|
68 |
+
color: white;
|
69 |
+
}
|
70 |
+
|
71 |
+
.main-header p {
|
72 |
+
font-size: 1.1rem;
|
73 |
+
opacity: 0.9;
|
74 |
+
}
|
75 |
+
|
76 |
+
/* Card containers */
|
77 |
+
.content-card {
|
78 |
+
background-color: white;
|
79 |
+
padding: 1.5rem;
|
80 |
+
border-radius: 12px;
|
81 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
|
82 |
+
border: 1px solid #e5e7eb;
|
83 |
+
margin-bottom: 1.5rem;
|
84 |
+
}
|
85 |
+
|
86 |
+
/* Upload section - Reduced size */
|
87 |
+
.upload-section {
|
88 |
+
text-align: center;
|
89 |
+
padding: 1rem;
|
90 |
+
border: 2px dashed #e5e7eb;
|
91 |
+
border-radius: 12px;
|
92 |
+
background-color: #f8fafc;
|
93 |
+
max-width: 600px;
|
94 |
+
margin: 0 auto;
|
95 |
+
}
|
96 |
+
|
97 |
+
.upload-icon {
|
98 |
+
font-size: 1.5rem;
|
99 |
+
color: #2563eb;
|
100 |
+
margin-bottom: 0.5rem;
|
101 |
+
}
|
102 |
+
|
103 |
+
/* Results section */
|
104 |
+
.results-header {
|
105 |
+
font-size: 1.25rem;
|
106 |
+
color: #1f2937;
|
107 |
+
margin-bottom: 1rem;
|
108 |
+
padding-bottom: 0.5rem;
|
109 |
+
border-bottom: 2px solid #e5e7eb;
|
110 |
+
}
|
111 |
+
|
112 |
+
/* Download buttons */
|
113 |
+
.download-button {
|
114 |
+
display: inline-block;
|
115 |
+
padding: 0.75rem 1.5rem;
|
116 |
+
background-color: #2563eb;
|
117 |
+
color: white;
|
118 |
+
text-decoration: none;
|
119 |
+
border-radius: 8px;
|
120 |
+
transition: all 0.2s;
|
121 |
+
text-align: center;
|
122 |
+
width: 100%;
|
123 |
+
}
|
124 |
+
|
125 |
+
.download-button:hover {
|
126 |
+
background-color: #1d4ed8;
|
127 |
+
box-shadow: 0 4px 6px -1px rgba(37, 99, 235, 0.2);
|
128 |
+
}
|
129 |
+
|
130 |
+
/* Tabs styling */
|
131 |
+
.stTabs [data-baseweb="tab-list"] {
|
132 |
+
gap: 1rem;
|
133 |
+
background-color: #f8fafc;
|
134 |
+
padding: 0.5rem;
|
135 |
+
border-radius: 8px;
|
136 |
+
}
|
137 |
+
|
138 |
+
.stTabs [data-baseweb="tab"] {
|
139 |
+
color: #4b5563;
|
140 |
+
font-weight: 500;
|
141 |
+
padding: 0.5rem 1.5rem;
|
142 |
+
border-radius: 6px;
|
143 |
+
}
|
144 |
+
|
145 |
+
.stTabs [data-baseweb="tab"][aria-selected="true"] {
|
146 |
+
background-color: #2563eb;
|
147 |
+
color: white;
|
148 |
+
}
|
149 |
+
|
150 |
+
/* Guide section styling */
|
151 |
+
.guide-section {
|
152 |
+
background-color: white;
|
153 |
+
padding: 2rem;
|
154 |
+
border-radius: 12px;
|
155 |
+
margin-bottom: 1.5rem;
|
156 |
+
}
|
157 |
+
|
158 |
+
.guide-header {
|
159 |
+
color: #1a365d;
|
160 |
+
font-size: 1.5rem;
|
161 |
+
margin-bottom: 1rem;
|
162 |
+
border-bottom: 2px solid #e5e7eb;
|
163 |
+
padding-bottom: 0.5rem;
|
164 |
+
}
|
165 |
+
|
166 |
+
.guide-subheader {
|
167 |
+
color: #2563eb;
|
168 |
+
font-size: 1.2rem;
|
169 |
+
margin: 1.5rem 0 0.5rem 0;
|
170 |
+
}
|
171 |
+
|
172 |
+
.guide-text {
|
173 |
+
color: #4b5563;
|
174 |
+
line-height: 1.6;
|
175 |
+
margin-bottom: 1rem;
|
176 |
+
}
|
177 |
+
|
178 |
+
.feature-card {
|
179 |
+
background-color: #f8fafc;
|
180 |
+
padding: 1rem;
|
181 |
+
border-radius: 8px;
|
182 |
+
margin-bottom: 1rem;
|
183 |
+
border-left: 4px solid #2563eb;
|
184 |
+
}
|
185 |
+
|
186 |
+
.step-container {
|
187 |
+
display: flex;
|
188 |
+
align-items: flex-start;
|
189 |
+
margin-bottom: 1rem;
|
190 |
+
}
|
191 |
+
|
192 |
+
.step-number {
|
193 |
+
background-color: #2563eb;
|
194 |
+
color: white;
|
195 |
+
width: 24px;
|
196 |
+
height: 24px;
|
197 |
+
border-radius: 12px;
|
198 |
+
display: flex;
|
199 |
+
align-items: center;
|
200 |
+
justify-content: center;
|
201 |
+
margin-right: 1rem;
|
202 |
+
flex-shrink: 0;
|
203 |
+
}
|
204 |
+
|
205 |
+
.info-icon {
|
206 |
+
color: #2563eb;
|
207 |
+
margin-right: 0.5rem;
|
208 |
+
}
|
209 |
+
|
210 |
+
.tech-details {
|
211 |
+
background-color: #f0f9ff;
|
212 |
+
padding: 1rem;
|
213 |
+
border-radius: 8px;
|
214 |
+
margin: 1rem 0;
|
215 |
+
}
|
216 |
+
|
217 |
+
</style>
|
218 |
+
""", unsafe_allow_html=True)
|
219 |
+
|
220 |
+
|
221 |
+
|
222 |
+
# Create sidebar with guide content
|
223 |
+
with st.sidebar:
|
224 |
+
# st.markdown('<div class="guide-section">', unsafe_allow_html=True)
|
225 |
+
st.divider()
|
226 |
+
st.markdown('<h2 class="guide-header">📚 User Guide</h2>', unsafe_allow_html=True)
|
227 |
+
|
228 |
+
# How It Works section
|
229 |
+
st.markdown('<h3 class="guide-subheader">🎯 How It Works</h3>', unsafe_allow_html=True)
|
230 |
+
st.markdown("""
|
231 |
+
<div class="guide-text">
|
232 |
+
This tool uses advanced computer vision and machine learning techniques to:
|
233 |
+
<ul>
|
234 |
+
<li>Detect and locate tables in document images</li>
|
235 |
+
<li>Extract structured data from the detected tables</li>
|
236 |
+
<li>Convert the data into easily manageable formats</li>
|
237 |
+
</ul>
|
238 |
+
</div>
|
239 |
+
""", unsafe_allow_html=True)
|
240 |
+
|
241 |
+
# Usage Instructions
|
242 |
+
st.markdown('<h3 class="guide-subheader">📝 Usage Instructions</h3>', unsafe_allow_html=True)
|
243 |
+
|
244 |
+
st.markdown("""
|
245 |
+
<div class="step-container">
|
246 |
+
<div class="step-number">1</div>
|
247 |
+
<div class="guide-text">Upload a document image containing a table (PNG, JPG, or JPEG format)</div>
|
248 |
+
</div>
|
249 |
+
|
250 |
+
<div class="step-container">
|
251 |
+
<div class="step-number">2</div>
|
252 |
+
<div class="guide-text">The tool will automatically detect and highlight the table in your image</div>
|
253 |
+
</div>
|
254 |
+
|
255 |
+
<div class="step-container">
|
256 |
+
<div class="step-number">3</div>
|
257 |
+
<div class="guide-text">View both raw and enhanced versions of the extracted data</div>
|
258 |
+
</div>
|
259 |
+
|
260 |
+
<div class="step-container">
|
261 |
+
<div class="step-number">4</div>
|
262 |
+
<div class="guide-text">Download the results in CSV format for further use</div>
|
263 |
+
</div>
|
264 |
+
""", unsafe_allow_html=True)
|
265 |
+
|
266 |
+
# Best Practices
|
267 |
+
st.markdown('<h3 class="guide-subheader">💡 Best Practices</h3>', unsafe_allow_html=True)
|
268 |
+
st.markdown("""
|
269 |
+
<div class="feature-card">
|
270 |
+
<strong>For Best Results:</strong>
|
271 |
+
<ul>
|
272 |
+
<li>Use clear, high-resolution images</li>
|
273 |
+
<li>Ensure tables have well-defined borders</li>
|
274 |
+
<li>Avoid skewed or rotated images</li>
|
275 |
+
<li>Make sure text is clearly readable</li>
|
276 |
+
</ul>
|
277 |
+
</div>
|
278 |
+
""", unsafe_allow_html=True)
|
279 |
+
|
280 |
+
# Technical Details (collapsible)
|
281 |
+
with st.expander("🔧 Technical Details"):
|
282 |
+
st.markdown("""
|
283 |
+
<div class="tech-details">
|
284 |
+
<p><strong>Algorithm Overview:</strong></p>
|
285 |
+
<ul>
|
286 |
+
<li>Uses computer vision for table boundary detection</li>
|
287 |
+
<li>Employs OCR (Optical Character Recognition) for text extraction</li>
|
288 |
+
<li>Implements intelligent cell segmentation</li>
|
289 |
+
<li>Applies post-processing for enhanced accuracy</li>
|
290 |
+
</ul>
|
291 |
+
</div>
|
292 |
+
""", unsafe_allow_html=True)
|
293 |
+
|
294 |
+
# Support Info
|
295 |
+
st.markdown('<h3 class="guide-subheader">🔗 Connect with Me</h3>', unsafe_allow_html=True)
|
296 |
+
st.markdown("""
|
297 |
+
<div class="guide-text" style="font-size: 1rem;">
|
298 |
+
If you encounter any issues or have questions, feel free to reach out:
|
299 |
+
<a href="https://github.com/Sudhanshu1304" target="_blank" style="text-decoration: none;">
|
300 |
+
<img src="https://img.icons8.com/ios-filled/20/000000/github.png" alt="GitHub" style="vertical-align: middle; margin-right: 5px;"/>
|
301 |
+
GitHub
|
302 |
+
</a> |
|
303 |
+
<a href="https://www.linkedin.com/in/sudhanshu-pandey-847448193/" target="_blank" style="text-decoration: none;">
|
304 |
+
<img src="https://img.icons8.com/ios-filled/20/000000/linkedin.png" alt="LinkedIn" style="vertical-align: middle; margin-right: 5px;"/>
|
305 |
+
LinkedIn
|
306 |
+
</a> |
|
307 |
+
<a href="https://medium.com/@sudhanshu.dpandey" target="_blank" style="text-decoration: none;">
|
308 |
+
<img src="https://img.icons8.com/ios-filled/20/000000/medium-logo.png" alt="Medium" style="vertical-align: middle; margin-right: 5px;"/>
|
309 |
+
Medium
|
310 |
+
</a>
|
311 |
+
</div>
|
312 |
+
""", unsafe_allow_html=True)
|
313 |
+
|
314 |
+
|
315 |
+
|
316 |
+
# Initialize session state for expanded view
|
317 |
+
if 'is_expanded' not in st.session_state:
|
318 |
+
st.session_state.is_expanded = False
|
319 |
+
|
320 |
+
|
321 |
+
# Title and description
|
322 |
+
st.markdown("""
|
323 |
+
<div class="main-header">
|
324 |
+
<h1>📊 Table Extraction Tool</h1>
|
325 |
+
<p>Upload an image containing tables and instantly convert them into structured data formats.</p>
|
326 |
+
</div>
|
327 |
+
""", unsafe_allow_html=True)
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
# File upload section - Reduced size
|
332 |
+
# st.markdown('<div class="content-card">', unsafe_allow_html=True)
|
333 |
+
# st.markdown("""
|
334 |
+
# <div class="upload-section">
|
335 |
+
# <div class="upload-icon">📥</div>
|
336 |
+
# <h3 style="font-size: 1.1rem; margin: 0.5rem 0;">Upload Table Image</h3>
|
337 |
+
# <p style="font-size: 0.9rem; margin: 0;">Supported formats: PNG, JPG, JPEG</p>
|
338 |
+
# </div>
|
339 |
+
# """, unsafe_allow_html=True)
|
340 |
+
uploaded_file = st.file_uploader("", type=['png', 'jpg', 'jpeg'])
|
341 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
342 |
+
|
343 |
+
# Process the uploaded file
|
344 |
+
if uploaded_file is not None:
|
345 |
+
with st.spinner('🔄 Processing your image...'):
|
346 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file:
|
347 |
+
tmp_file.write(uploaded_file.getvalue())
|
348 |
+
temp_path = tmp_file.name
|
349 |
+
|
350 |
+
try:
|
351 |
+
image = Image.open(uploaded_file)
|
352 |
+
(raw_df, cleaned_df), bbox = process_image(temp_path)
|
353 |
+
|
354 |
+
st.session_state.raw_data = raw_df
|
355 |
+
st.session_state.processed_data = cleaned_df
|
356 |
+
marked_image = draw_bounding_box(image, bbox[0])
|
357 |
+
st.session_state.marked_image = marked_image
|
358 |
+
|
359 |
+
# Side by side layout
|
360 |
+
col1, col2 = st.columns([0.4, 0.6])
|
361 |
+
|
362 |
+
with col1:
|
363 |
+
# st.markdown('<div class="content-card image-container">', unsafe_allow_html=True)
|
364 |
+
st.divider()
|
365 |
+
st.markdown('<h3 class="results-header">Detected Table</h3>', unsafe_allow_html=True)
|
366 |
+
st.image(marked_image, use_container_width=True)
|
367 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
368 |
+
|
369 |
+
with col2:
|
370 |
+
# st.markdown('<div class="content-card">', unsafe_allow_html=True)
|
371 |
+
st.divider()
|
372 |
+
st.markdown('<h3 class="results-header">Extracted Data</h3>', unsafe_allow_html=True)
|
373 |
+
|
374 |
+
# # Toggle button for expanded view
|
375 |
+
# if st.button("🔍 Toggle Full View" if not st.session_state.is_expanded else "⬆️ Collapse View"):
|
376 |
+
# st.session_state.is_expanded = not st.session_state.is_expanded
|
377 |
+
|
378 |
+
tabs = st.tabs(["🔍 Raw Data", "✨ Enhanced Data ⭐"])
|
379 |
+
|
380 |
+
with tabs[0]:
|
381 |
+
st.dataframe(st.session_state.raw_data,
|
382 |
+
use_container_width=True,
|
383 |
+
height=600 if not st.session_state.is_expanded else None)
|
384 |
+
|
385 |
+
# Add HTML copy section for raw data
|
386 |
+
st.markdown("### 📋 Copy HTML Table")
|
387 |
+
html_raw = st.session_state.raw_data.to_html(index=False)
|
388 |
+
st.markdown("""
|
389 |
+
<div style="background-color: #f8fafc; padding: 0.5rem; border-radius: 8px; margin-bottom: 0.5rem;">
|
390 |
+
<p style="margin: 0; color: #475569; font-size: 0.9rem;">
|
391 |
+
ℹ️ This HTML can be copied and used directly in websites, LLM prompts, or other applications.
|
392 |
+
</p>
|
393 |
+
</div>
|
394 |
+
""", unsafe_allow_html=True)
|
395 |
+
st.markdown("""
|
396 |
+
<div style="max-height: 150px; overflow-y: auto; border-radius: 8px;">
|
397 |
+
""", unsafe_allow_html=True)
|
398 |
+
st.code(html_raw, language="html")
|
399 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
400 |
+
|
401 |
+
with tabs[1]:
|
402 |
+
st.markdown("""
|
403 |
+
<div style="background-color: #f0f9ff; padding: 1rem; border-radius: 8px; margin-bottom: 1rem;">
|
404 |
+
<p style="margin: 0; color: #1e40af;">
|
405 |
+
⭐ This is our enhanced version of the table with improved formatting and structure.
|
406 |
+
</p>
|
407 |
+
</div>
|
408 |
+
""", unsafe_allow_html=True)
|
409 |
+
st.dataframe(st.session_state.processed_data,
|
410 |
+
use_container_width=True,
|
411 |
+
height=600 if not st.session_state.is_expanded else None)
|
412 |
+
|
413 |
+
# Add HTML copy section for enhanced data
|
414 |
+
st.markdown("### 📋 Copy HTML Table")
|
415 |
+
html_enhanced = st.session_state.processed_data.to_html(index=False)
|
416 |
+
st.markdown("""
|
417 |
+
<div style="background-color: #f8fafc; padding: 0.5rem; border-radius: 8px; margin-bottom: 0.5rem;">
|
418 |
+
<p style="margin: 0; color: #475569; font-size: 0.9rem;">
|
419 |
+
ℹ️ This HTML can be copied and used directly in websites, LLM prompts, or other applications.
|
420 |
+
</p>
|
421 |
+
</div>
|
422 |
+
""", unsafe_allow_html=True)
|
423 |
+
st.markdown("""
|
424 |
+
<div style="max-height: 150px; overflow-y: auto; border-radius: 8px;">
|
425 |
+
""", unsafe_allow_html=True)
|
426 |
+
st.code(html_enhanced, language="html")
|
427 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
428 |
+
|
429 |
+
# st.markdown('</div>', unsafe_allow_html=True)
|
430 |
+
|
431 |
+
# Download section below both columns
|
432 |
+
# st.markdown('<div class="content-card">', unsafe_allow_html=True)
|
433 |
+
# Download section below both columns
|
434 |
+
st.divider()
|
435 |
+
st.markdown('<h3 class="results-header">Download Options</h3>', unsafe_allow_html=True)
|
436 |
+
download_cols = st.columns([1, 0.1, 1])
|
437 |
+
|
438 |
+
def get_csv_download_link(df, filename):
|
439 |
+
csv = df.to_csv(index=False).encode()
|
440 |
+
b64 = base64.b64encode(csv).decode()
|
441 |
+
return f'<a href="data:file/csv;base64,{b64}" download="{filename}" class="download-button">📥 Download {filename}</a>'
|
442 |
+
|
443 |
+
with download_cols[0]:
|
444 |
+
if 'raw_data' in st.session_state:
|
445 |
+
csv = st.session_state.raw_data.to_csv(index=False)
|
446 |
+
st.download_button(
|
447 |
+
label="📥 Download Raw Data",
|
448 |
+
data=csv,
|
449 |
+
file_name="raw_data.csv",
|
450 |
+
mime="text/csv",
|
451 |
+
use_container_width=True,
|
452 |
+
key="raw_download"
|
453 |
+
)
|
454 |
+
|
455 |
+
with download_cols[2]:
|
456 |
+
if 'processed_data' in st.session_state:
|
457 |
+
csv = st.session_state.processed_data.to_csv(index=False)
|
458 |
+
st.download_button(
|
459 |
+
label="📥 Download Enhanced Data ⭐",
|
460 |
+
data=csv,
|
461 |
+
file_name="enhanced_data.csv",
|
462 |
+
mime="text/csv",
|
463 |
+
use_container_width=True,
|
464 |
+
key="enhanced_download"
|
465 |
+
)
|
466 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
467 |
+
|
468 |
+
except Exception as e:
|
469 |
+
st.error(f"❌ Error processing image: {str(traceback.format_exc())}")
|
470 |
+
|
471 |
+
finally:
|
472 |
+
try:
|
473 |
+
os.unlink(temp_path)
|
474 |
+
except Exception as e:
|
475 |
+
st.warning(f"⚠️ Error removing temporary file: {str(e)}")
|
src/table_creator/__pycache__/data_structures.cpython-312.pyc
ADDED
Binary file (8.74 kB). View file
|
|
src/table_creator/__pycache__/table_extractor.cpython-312.pyc
ADDED
Binary file (9.27 kB). View file
|
|
src/table_creator/data_structures.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Dict, List, Optional, Tuple
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
@dataclass
|
7 |
+
class TableCell:
|
8 |
+
"""
|
9 |
+
Represents a cell in a table with its value and position.
|
10 |
+
|
11 |
+
Attributes:
|
12 |
+
value: The text content of the cell
|
13 |
+
bbox: Bounding box coordinates [x1, y1, x2, y2]
|
14 |
+
column_name: Name of the column this cell belongs to
|
15 |
+
"""
|
16 |
+
value: str
|
17 |
+
bbox: List[int]
|
18 |
+
column_name: str
|
19 |
+
|
20 |
+
@dataclass
|
21 |
+
class TableRow:
|
22 |
+
"""
|
23 |
+
Represents a row in a table with its cells and boundaries.
|
24 |
+
|
25 |
+
Attributes:
|
26 |
+
cells: Dictionary of column name to TableCell
|
27 |
+
min_x: Minimum x coordinate of the row
|
28 |
+
max_x: Maximum x coordinate of the row
|
29 |
+
min_y: Minimum y coordinate of the row
|
30 |
+
max_y: Maximum y coordinate of the row
|
31 |
+
"""
|
32 |
+
cells: Dict[str, TableCell]
|
33 |
+
min_x: float
|
34 |
+
max_x: float
|
35 |
+
min_y: float
|
36 |
+
max_y: float
|
37 |
+
|
38 |
+
class TableStructure:
|
39 |
+
"""
|
40 |
+
Maintains the structure of a table using a linked list representation.
|
41 |
+
"""
|
42 |
+
|
43 |
+
def __init__(self, debug: bool = False) -> None:
|
44 |
+
"""
|
45 |
+
Initialize the table structure.
|
46 |
+
|
47 |
+
Args:
|
48 |
+
debug: Enable debug logging
|
49 |
+
"""
|
50 |
+
self.rows: List[TableRow] = []
|
51 |
+
self.debug = debug
|
52 |
+
|
53 |
+
def build_structure(self, dataframes: Dict[str, pd.DataFrame]) -> pd.DataFrame:
|
54 |
+
"""
|
55 |
+
Build table structure from column-wise dataframes.
|
56 |
+
|
57 |
+
Args:
|
58 |
+
dataframes: Dictionary of column name to DataFrame containing text and positions
|
59 |
+
|
60 |
+
Returns:
|
61 |
+
DataFrame with structured table data
|
62 |
+
"""
|
63 |
+
if not dataframes:
|
64 |
+
return pd.DataFrame()
|
65 |
+
|
66 |
+
# Initialize with first column
|
67 |
+
first_col = list(dataframes.keys())[0]
|
68 |
+
self._initialize_rows(first_col, dataframes[first_col])
|
69 |
+
|
70 |
+
# Process remaining columns
|
71 |
+
for col_name in list(dataframes.keys())[1:]:
|
72 |
+
self._process_column(col_name, dataframes[col_name])
|
73 |
+
|
74 |
+
return self._to_dataframe(dataframes.keys())
|
75 |
+
|
76 |
+
def _initialize_rows(self, column_name: str, df: pd.DataFrame) -> None:
|
77 |
+
"""Initialize rows with the first column's data."""
|
78 |
+
for _, row in df.iterrows():
|
79 |
+
bbox = row['boundingBox']
|
80 |
+
self.rows.append(TableRow(
|
81 |
+
cells={column_name: TableCell(row['text'], bbox, column_name)},
|
82 |
+
min_x=bbox[0],
|
83 |
+
max_x=bbox[2],
|
84 |
+
min_y=bbox[1],
|
85 |
+
max_y=bbox[3]
|
86 |
+
))
|
87 |
+
|
88 |
+
def _process_column(self, column_name: str, df: pd.DataFrame) -> None:
|
89 |
+
"""Process additional columns and align with existing rows."""
|
90 |
+
search_idx = 0
|
91 |
+
|
92 |
+
for _, row in df.iterrows():
|
93 |
+
text = row['text']
|
94 |
+
bbox = row['boundingBox']
|
95 |
+
|
96 |
+
matched = False
|
97 |
+
for idx, table_row in enumerate(self.rows[search_idx:], search_idx):
|
98 |
+
overlap = self._calculate_overlap(
|
99 |
+
bbox,
|
100 |
+
[bbox[0], table_row.min_y, bbox[2], table_row.max_y]
|
101 |
+
)
|
102 |
+
|
103 |
+
if overlap > 10:
|
104 |
+
self._update_row(idx, column_name, text, bbox)
|
105 |
+
search_idx = idx + 1
|
106 |
+
matched = True
|
107 |
+
break
|
108 |
+
elif bbox[3] <= table_row.min_y:
|
109 |
+
self._insert_row(idx, column_name, text, bbox)
|
110 |
+
search_idx = idx + 1
|
111 |
+
matched = True
|
112 |
+
break
|
113 |
+
|
114 |
+
if not matched and bbox[1] >= self.rows[-1].max_y:
|
115 |
+
self._append_row(column_name, text, bbox)
|
116 |
+
|
117 |
+
def _calculate_overlap(self, rect1: List[int], rect2: List[int]) -> float:
|
118 |
+
"""Calculate percentage overlap between two rectangles."""
|
119 |
+
x_left = max(rect1[0], rect2[0])
|
120 |
+
y_top = max(rect1[1], rect2[1])
|
121 |
+
x_right = min(rect1[2], rect2[2])
|
122 |
+
y_bottom = min(rect1[3], rect2[3])
|
123 |
+
|
124 |
+
if x_right < x_left or y_bottom < y_top:
|
125 |
+
return 0.0
|
126 |
+
|
127 |
+
intersection = (x_right - x_left) * (y_bottom - y_top)
|
128 |
+
min_area = min(
|
129 |
+
(rect1[2] - rect1[0]) * (rect1[3] - rect1[1]),
|
130 |
+
(rect2[2] - rect2[0]) * (rect2[3] - rect2[1])
|
131 |
+
)
|
132 |
+
|
133 |
+
return (intersection / min_area * 100) if min_area > 0 else 0
|
134 |
+
|
135 |
+
def _update_row(self, idx: int, column_name: str, text: str, bbox: List[int]) -> None:
|
136 |
+
"""Update existing row with new cell data."""
|
137 |
+
self.rows[idx].cells[column_name] = TableCell(text, bbox, column_name)
|
138 |
+
self.rows[idx].min_x = min(self.rows[idx].min_x, bbox[0])
|
139 |
+
self.rows[idx].max_x = max(self.rows[idx].max_x, bbox[2])
|
140 |
+
|
141 |
+
def _insert_row(self, idx: int, column_name: str, text: str, bbox: List[int]) -> None:
|
142 |
+
"""Insert new row at specified index."""
|
143 |
+
self.rows.insert(idx, TableRow(
|
144 |
+
cells={column_name: TableCell(text, bbox, column_name)},
|
145 |
+
min_x=bbox[0],
|
146 |
+
max_x=bbox[2],
|
147 |
+
min_y=bbox[1],
|
148 |
+
max_y=bbox[3]
|
149 |
+
))
|
150 |
+
|
151 |
+
def _append_row(self, column_name: str, text: str, bbox: List[int]) -> None:
|
152 |
+
"""Append new row at the end."""
|
153 |
+
self.rows.append(TableRow(
|
154 |
+
cells={column_name: TableCell(text, bbox, column_name)},
|
155 |
+
min_x=bbox[0],
|
156 |
+
max_x=bbox[2],
|
157 |
+
min_y=bbox[1],
|
158 |
+
max_y=bbox[3]
|
159 |
+
))
|
160 |
+
|
161 |
+
def _to_dataframe(self, columns: List[str]) -> pd.DataFrame:
|
162 |
+
"""Convert table structure to DataFrame."""
|
163 |
+
data = []
|
164 |
+
for row in self.rows:
|
165 |
+
row_data = {
|
166 |
+
col: row.cells[col].value if col in row.cells else None
|
167 |
+
for col in columns
|
168 |
+
}
|
169 |
+
row_data.update({
|
170 |
+
'row_min_x': row.min_x,
|
171 |
+
'row_max_x': row.max_x,
|
172 |
+
'row_min_y': row.min_y,
|
173 |
+
'row_max_y': row.max_y
|
174 |
+
})
|
175 |
+
data.append(row_data)
|
176 |
+
|
177 |
+
return pd.DataFrame(data)
|
src/table_creator/table_extractor.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from models.table_detector import TableDetector
|
2 |
+
from models.text_recognizer import TextRecognizer
|
3 |
+
from table_creator.data_structures import TableStructure
|
4 |
+
import pandas as pd
|
5 |
+
import re
|
6 |
+
|
7 |
+
class TableExtraction:
|
8 |
+
def __init__(self) -> None:
|
9 |
+
self._table_detection = TableDetector()
|
10 |
+
self._document_ocr = TextRecognizer()
|
11 |
+
self._linklist = TableStructure()
|
12 |
+
|
13 |
+
def _merge_words(self, prev_obj, word, word_bb):
|
14 |
+
"""Merge the current word with the previous one if they overlap significantly."""
|
15 |
+
merged_text = prev_obj[0] + ' ' + word
|
16 |
+
merged_bb = [
|
17 |
+
prev_obj[1][0], prev_obj[1][1], word_bb[2], word_bb[3]
|
18 |
+
]
|
19 |
+
return (merged_text, merged_bb)
|
20 |
+
|
21 |
+
def _assign_to_column(self, word, word_bb, columns, df, debug=False):
|
22 |
+
"""Assign a word to the correct column based on bounding box overlap."""
|
23 |
+
for key, col_bb in columns.items():
|
24 |
+
word_bb_temp = [word_bb[0], col_bb[1], word_bb[2], col_bb[3]]
|
25 |
+
overlap = self._table_detection._calculate_overlap(word_bb_temp, col_bb)
|
26 |
+
|
27 |
+
if overlap > 10:
|
28 |
+
if len(df[key]) > 0:
|
29 |
+
prev_obj = df[key][-1]
|
30 |
+
prev_overlap = self._table_detection._calculate_overlap(
|
31 |
+
prev_obj[1], [prev_obj[1][0], word_bb[1], prev_obj[1][2], word_bb[3]]
|
32 |
+
)
|
33 |
+
if prev_overlap >= 30:
|
34 |
+
word, word_bb = self._merge_words(prev_obj, word, word_bb)
|
35 |
+
df[key][-1] = (word, word_bb)
|
36 |
+
else:
|
37 |
+
df[key].append((word, word_bb))
|
38 |
+
else:
|
39 |
+
df[key].append((word, word_bb))
|
40 |
+
# Dynamically adjust the column bounding box to fit the new word
|
41 |
+
columns[key] = [
|
42 |
+
min(word_bb[0], col_bb[0]), col_bb[1],
|
43 |
+
max(word_bb[2], col_bb[2]), col_bb[3]
|
44 |
+
]
|
45 |
+
return True
|
46 |
+
return False
|
47 |
+
|
48 |
+
def _get_normalized_bounding_box(self, imgsz : str, bb : list) -> pd.DataFrame:
|
49 |
+
names = ['pdf1','sample_pdf2.pdf']
|
50 |
+
pass
|
51 |
+
|
52 |
+
def get_words_in_column(self, cords: dict, df_word: pd.DataFrame, merge=True, debug=False):
|
53 |
+
"""Distribute words into their respective columns based on bounding box coordinates."""
|
54 |
+
df = {key: [] for key in cords}
|
55 |
+
unknown_columns = {}
|
56 |
+
unknown_data = {}
|
57 |
+
|
58 |
+
for index, row in df_word.iterrows():
|
59 |
+
word, word_bb = row['text'], list(map(int, row['boundingBox']))
|
60 |
+
if debug:
|
61 |
+
print(f"\nProcessing word: '{word}'")
|
62 |
+
|
63 |
+
if not self._assign_to_column(word, word_bb, cords, df, debug):
|
64 |
+
# Handle words that do not match any known column
|
65 |
+
for key, val in unknown_columns.items():
|
66 |
+
overlap = self._table_detection._calculate_overlap(
|
67 |
+
val, [word_bb[0], val[1], word_bb[2], val[3]]
|
68 |
+
)
|
69 |
+
if overlap > 30:
|
70 |
+
prev_obj = unknown_data[key][-1]
|
71 |
+
prev_overlap = self._table_detection._calculate_overlap(
|
72 |
+
prev_obj[1], [prev_obj[1][0], word_bb[1], prev_obj[1][2], word_bb[3]]
|
73 |
+
)
|
74 |
+
if prev_overlap >= 30:
|
75 |
+
word, word_bb = self._merge_words(prev_obj, word, word_bb)
|
76 |
+
unknown_data[key][-1] = (word, word_bb)
|
77 |
+
else:
|
78 |
+
unknown_data[key].append((word, word_bb))
|
79 |
+
break
|
80 |
+
else:
|
81 |
+
# Create a new unknown column if no match is found
|
82 |
+
unknown_key = f'{word}__{index}__'
|
83 |
+
unknown_columns[unknown_key] = word_bb
|
84 |
+
unknown_data[unknown_key] = [(word, word_bb)]
|
85 |
+
|
86 |
+
if merge:
|
87 |
+
df.update(unknown_data)
|
88 |
+
|
89 |
+
# Convert lists to DataFrames
|
90 |
+
df = {key: pd.DataFrame(val, columns=['text', 'boundingBox']) for key, val in df.items()}
|
91 |
+
return df, unknown_data, unknown_columns
|
92 |
+
|
93 |
+
def postprocess(self, parsed_df: pd.DataFrame, columns=None):
|
94 |
+
"""Post-process the parsed DataFrame to merge columns and clean data."""
|
95 |
+
try:
|
96 |
+
parsed_df = parsed_df.dropna(how='all').reset_index(drop=True)
|
97 |
+
new_df = pd.DataFrame()
|
98 |
+
|
99 |
+
# Merge adjacent empty header columns
|
100 |
+
empty_columns = parsed_df.columns[parsed_df.iloc[:1].isna().all()].tolist()
|
101 |
+
for col in empty_columns[::-1]:
|
102 |
+
col_idx = list(parsed_df.columns).index(col)
|
103 |
+
if col_idx > 0:
|
104 |
+
parsed_df.iloc[:, col_idx - 1] += ' ' + parsed_df.iloc[:, col_idx]
|
105 |
+
parsed_df = parsed_df.drop(columns=empty_columns)
|
106 |
+
|
107 |
+
if not columns:
|
108 |
+
return parsed_df
|
109 |
+
|
110 |
+
used_indices = set()
|
111 |
+
for header in columns:
|
112 |
+
match_indices = [i for i, col in enumerate(parsed_df.columns) if header in col]
|
113 |
+
if match_indices:
|
114 |
+
used_indices.update(match_indices)
|
115 |
+
new_df[header] = parsed_df.iloc[:, match_indices].apply(
|
116 |
+
lambda x: ' '.join(x.fillna('').str.strip()), axis=1
|
117 |
+
)
|
118 |
+
|
119 |
+
# Include unused columns
|
120 |
+
unused_columns = [col for i, col in enumerate(parsed_df.columns) if i not in used_indices]
|
121 |
+
new_df = pd.concat([new_df, parsed_df[unused_columns]], axis=1)
|
122 |
+
|
123 |
+
return new_df
|
124 |
+
except Exception as e:
|
125 |
+
print(f"Error in postprocess: {e}")
|
126 |
+
return parsed_df
|
127 |
+
|
128 |
+
def detect(self, image_path: str):
|
129 |
+
"""Detect tables in an image and extract their data."""
|
130 |
+
cords = self._table_detection.detect(image_path)
|
131 |
+
all_table_df = self._document_ocr.recognize(image_path, cords)
|
132 |
+
|
133 |
+
table_data = []
|
134 |
+
for table in all_table_df:
|
135 |
+
column_data, _, _ = self.get_words_in_column({}, table)
|
136 |
+
ordered_columns = sorted(column_data, key=lambda x: column_data[x].iloc[0]['boundingBox'][0])
|
137 |
+
dictword = {col: column_data[col] for col in ordered_columns}
|
138 |
+
|
139 |
+
df = self._linklist.build_structure(dictword)
|
140 |
+
df = df.loc[:, ordered_columns]
|
141 |
+
df = df.rename(columns=lambda col: re.sub(r'__\d+__', '', str(col)).strip())
|
142 |
+
df_postp = self.postprocess(df)
|
143 |
+
|
144 |
+
# Assign generic column names
|
145 |
+
df.columns = [f"column {i+1}" for i in range(df.shape[1])]
|
146 |
+
table_data.append((df, df_postp))
|
147 |
+
|
148 |
+
return table_data[0], cords
|
src/table_creator/visualization.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Tuple, Union
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
class TableVisualizer:
|
7 |
+
"""
|
8 |
+
Utility class for visualizing detected tables and OCR results.
|
9 |
+
"""
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def draw_boxes(
|
13 |
+
image: Union[np.ndarray, Image.Image],
|
14 |
+
boxes: List[List[int]],
|
15 |
+
color: Tuple[int, int, int] = (0, 255, 0),
|
16 |
+
thickness: int = 2
|
17 |
+
) -> Image.Image:
|
18 |
+
"""
|
19 |
+
Draw bounding boxes on an image.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
image: Input image
|
23 |
+
boxes: List of bounding box coordinates [x1, y1, x2, y2]
|
24 |
+
color: RGB color for the boxes
|
25 |
+
thickness: Line thickness
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
Image with drawn bounding boxes
|
29 |
+
"""
|
30 |
+
if isinstance(image, Image.Image):
|
31 |
+
image = np.array(image)
|
32 |
+
|
33 |
+
if len(image.shape) == 2:
|
34 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
|
35 |
+
elif image.shape[2] == 4:
|
36 |
+
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
37 |
+
|
38 |
+
image_copy = image.copy()
|
39 |
+
|
40 |
+
for box in boxes:
|
41 |
+
cv2.rectangle(
|
42 |
+
image_copy,
|
43 |
+
(box[0], box[1]),
|
44 |
+
(box[2], box[3]),
|
45 |
+
color,
|
46 |
+
thickness
|
47 |
+
)
|
48 |
+
|
49 |
+
return Image.fromarray(image_copy)
|
50 |
+
|
51 |
+
@staticmethod
|
52 |
+
def draw_text_boxes(
|
53 |
+
image: Union[np.ndarray, Image.Image],
|
54 |
+
text_data: List[Tuple[str, List[int]]],
|
55 |
+
color: Tuple[int, int, int] = (255, 0, 0),
|
56 |
+
thickness: int = 1
|
57 |
+
) -> Image.Image:
|
58 |
+
"""
|
59 |
+
Draw text boxes with labels on an image.
|
60 |
+
|
61 |
+
Args:
|
62 |
+
image: Input image
|
63 |
+
text_data: List of (text, bbox) tuples
|
64 |
+
color: RGB color for the boxes
|
65 |
+
thickness: Line thickness
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
Image with drawn text boxes
|
69 |
+
"""
|
70 |
+
if isinstance(image, Image.Image):
|
71 |
+
image = np.array(image)
|
72 |
+
|
73 |
+
image_copy = image.copy()
|
74 |
+
|
75 |
+
for text, bbox in text_data:
|
76 |
+
cv2.rectangle(
|
77 |
+
image_copy,
|
78 |
+
(bbox[0], bbox[1]),
|
79 |
+
(bbox[2], bbox[3]),
|
80 |
+
color,
|
81 |
+
thickness
|
82 |
+
)
|
83 |
+
cv2.putText(
|
84 |
+
image_copy,
|
85 |
+
text[:20],
|
86 |
+
(bbox[0], bbox[1] - 5),
|
87 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
88 |
+
0.5,
|
89 |
+
color,
|
90 |
+
thickness
|
91 |
+
)
|
92 |
+
|
93 |
+
return Image.fromarray(image_copy)
|