Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
8f69832
1
Parent(s):
ae7097b
augmentation, first pass
Browse filesSigned-off-by: Niv Sardi <[email protected]>
- README.org +7 -1
- python/augment.py +111 -0
- python/imtool.py +53 -7
- python/pipelines.py +79 -0
- python/requirements.txt +10 -3
- python/screenshot.py +2 -6
- python/web.py +3 -3
README.org
CHANGED
@@ -34,7 +34,13 @@ https://github.com/ModelDepot/tfjs-yolo-tiny
|
|
34 |
https://github.com/Hyuto/yolov5-tfjs
|
35 |
|
36 |
** augmentation
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
** proveedores
|
40 |
http://www.bcra.gov.ar/SistemasFinancierosYdePagos/Proveedores-servicios-de-pago-ofrecen-cuentas-de-pago.asp
|
|
|
34 |
https://github.com/Hyuto/yolov5-tfjs
|
35 |
|
36 |
** augmentation
|
37 |
+
there were a lot of augmentation solutions out there, because it had better
|
38 |
+
piplines and multicore support we went with:
|
39 |
+
- https://github.com/aleju/imgaug
|
40 |
+
|
41 |
+
but leaving the other here for refs
|
42 |
+
- https://github.com/srp-31/Data-Augmentation-for-Object-Detection-YOLO-
|
43 |
+
- https://github.com/mdbloice/Augmentor
|
44 |
|
45 |
** proveedores
|
46 |
http://www.bcra.gov.ar/SistemasFinancierosYdePagos/Proveedores-servicios-de-pago-ofrecen-cuentas-de-pago.asp
|
python/augment.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import math
|
4 |
+
import random
|
5 |
+
|
6 |
+
from io import BytesIO
|
7 |
+
import numpy as np
|
8 |
+
from cairosvg import svg2png
|
9 |
+
import cv2
|
10 |
+
|
11 |
+
import filetype
|
12 |
+
from filetype.match import image_matchers
|
13 |
+
|
14 |
+
import imgaug as ia
|
15 |
+
from imgaug import augmenters as iaa
|
16 |
+
from imgaug.augmentables.batches import UnnormalizedBatch
|
17 |
+
|
18 |
+
from common import defaults, mkdir
|
19 |
+
import imtool
|
20 |
+
import pipelines
|
21 |
+
|
22 |
+
BATCH_SIZE = 16
|
23 |
+
|
24 |
+
mkdir.make_dirs([defaults.AUGMENTED_IMAGES_PATH, defaults.AUGMENTED_LABELS_PATH])
|
25 |
+
|
26 |
+
logo_images = []
|
27 |
+
background_images = [d for d in os.scandir(defaults.IMAGES_PATH)]
|
28 |
+
|
29 |
+
stats = {
|
30 |
+
'failed': 0,
|
31 |
+
'ok': 0
|
32 |
+
}
|
33 |
+
|
34 |
+
for d in os.scandir(defaults.LOGOS_DATA_PATH):
|
35 |
+
img = None
|
36 |
+
if not d.is_file():
|
37 |
+
stats['failed'] += 1
|
38 |
+
continue
|
39 |
+
|
40 |
+
try:
|
41 |
+
if filetype.match(d.path, matchers=image_matchers):
|
42 |
+
img = cv2.imread(d.path, cv2.IMREAD_UNCHANGED)
|
43 |
+
else:
|
44 |
+
png = svg2png(url=d.path)
|
45 |
+
img = cv2.imdecode(np.asarray(bytearray(png), dtype=np.uint8), cv2.IMREAD_UNCHANGED)
|
46 |
+
stats['ok'] += 1
|
47 |
+
|
48 |
+
(h, w, c) = img.shape
|
49 |
+
if c == 3:
|
50 |
+
img = imtool.add_alpha(img)
|
51 |
+
|
52 |
+
if img.ndim < 3:
|
53 |
+
print(f'very bad dim: {img.ndim}')
|
54 |
+
|
55 |
+
img = imtool.remove_white(img)
|
56 |
+
(h, w, c) = img.shape
|
57 |
+
|
58 |
+
assert(w > 10)
|
59 |
+
assert(h > 10)
|
60 |
+
|
61 |
+
logo_images.append(img)
|
62 |
+
except Exception as e:
|
63 |
+
stats['failed'] += 1
|
64 |
+
print(f'error loading: {d.path}: {e}')
|
65 |
+
|
66 |
+
print(stats)
|
67 |
+
batches = [UnnormalizedBatch(images=logo_images[i:i+BATCH_SIZE])
|
68 |
+
for i in range(math.floor(len(logo_images)/BATCH_SIZE))]
|
69 |
+
|
70 |
+
# We use a single, very fast augmenter here to show that batches
|
71 |
+
# are only loaded once there is space again in the buffer.
|
72 |
+
pipeline = pipelines.HUGE
|
73 |
+
|
74 |
+
def create_generator(lst):
|
75 |
+
for b in lst:
|
76 |
+
print(f"Loading next unaugmented batch...")
|
77 |
+
yield b
|
78 |
+
|
79 |
+
batches_generator = create_generator(batches)
|
80 |
+
|
81 |
+
with pipeline.pool(processes=-1, seed=1) as pool:
|
82 |
+
batches_aug = pool.imap_batches(batches_generator, output_buffer_size=5)
|
83 |
+
|
84 |
+
print(f"Requesting next augmented batch...")
|
85 |
+
for i, batch_aug in enumerate(batches_aug):
|
86 |
+
idx = list(range(len(batch_aug.images_aug)))
|
87 |
+
random.shuffle(idx)
|
88 |
+
for j, d in enumerate(background_images):
|
89 |
+
img = imtool.remove_white(cv2.imread(d.path))
|
90 |
+
basename = d.name.replace('.png', '') + f'.{i}.{j}'
|
91 |
+
|
92 |
+
anotations = []
|
93 |
+
for k in range(math.floor(len(batch_aug.images_aug)/3)):
|
94 |
+
logo = batch_aug.images_aug[(j+k)%len(batch_aug.images_aug)]
|
95 |
+
try:
|
96 |
+
img, bb, (w, h) = imtool.mix(img, logo, random.random(), random.random())
|
97 |
+
anotations.append(f'0 {bb.x/w} {bb.y/h} {bb.w/w} {bb.h/h}')
|
98 |
+
except AssertionError:
|
99 |
+
print(f'couldnt process {i}, {j}')
|
100 |
+
|
101 |
+
try:
|
102 |
+
cv2.imwrite(f'{defaults.AUGMENTED_IMAGES_PATH}/{basename}.png', img)
|
103 |
+
label_path = f"{defaults.AUGMENTED_LABELS_PATH}/{basename}.txt"
|
104 |
+
with open(label_path, 'a') as f:
|
105 |
+
f.write('\n'.join(anotations))
|
106 |
+
except Exception:
|
107 |
+
print(f'couldnt write image {basename}')
|
108 |
+
|
109 |
+
if i < len(batches)-1:
|
110 |
+
print("Requesting next augmented batch...")
|
111 |
+
|
python/imtool.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
import os
|
4 |
import math
|
5 |
import cv2
|
|
|
6 |
from typing import NamedTuple
|
7 |
|
8 |
from entity import Entity
|
@@ -38,16 +39,26 @@ class Centroid(BoundingBox):
|
|
38 |
|
39 |
def read_bounding_boxes(filename):
|
40 |
boxes = []
|
|
|
41 |
with open(filename, 'r') as f:
|
42 |
lines = f.readlines()
|
43 |
for l in lines:
|
44 |
-
(
|
|
|
45 |
if x < 0 or y < 0 or w < 10 or h < 10:
|
46 |
-
print(f"dropping logo, it has inconsistent size: {w}x{h}
|
47 |
continue
|
48 |
boxes.append(BoundingBox(x,y,w,h))
|
49 |
return bco, boxes
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def floor_point(x, y):
|
52 |
return (math.floor(x), math.floor(y))
|
53 |
|
@@ -64,6 +75,39 @@ def cut_logo(im, l):
|
|
64 |
(x, y, w, h) = floor_logo(l)
|
65 |
return im[x:w, y:h]
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def crop(id, fn, logos):
|
68 |
basename = os.path.basename(fn).replace('.png', '')
|
69 |
img_out = f"./data/squares/images"
|
@@ -85,7 +129,7 @@ def crop(id, fn, logos):
|
|
85 |
for x in range(tx):
|
86 |
for y in range(ty):
|
87 |
color = (0,x*(255/tx),y*(255/ty))
|
88 |
-
|
89 |
|
90 |
if tx < 2:
|
91 |
xs = 0
|
@@ -104,6 +148,10 @@ def crop(id, fn, logos):
|
|
104 |
rim = cv2.rectangle(rim, start, end, color, 10)
|
105 |
li = []
|
106 |
for l in logos:
|
|
|
|
|
|
|
|
|
107 |
def intersect():
|
108 |
six = l.x - f.x
|
109 |
siy = l.y - f.y
|
@@ -135,8 +183,6 @@ def crop(id, fn, logos):
|
|
135 |
if p:
|
136 |
li.append(p)
|
137 |
|
138 |
-
c = (255, 0, 0)
|
139 |
-
|
140 |
nim = im[start[1]:end[1], start[0]:end[0]]
|
141 |
rnim = rim[start[1]:end[1], start[0]:end[0]]
|
142 |
img_name =f"{img_out}/{basename}-x{x}y{y}.jpg"
|
@@ -152,7 +198,7 @@ def crop(id, fn, logos):
|
|
152 |
dim = cv2.rectangle(rnim,
|
153 |
floor_point(cx - p.w/2, cy - p.h/2),
|
154 |
floor_point(cx + p.w/2, cy + p.h/2),
|
155 |
-
|
156 |
5)
|
157 |
|
158 |
a = f"{int(id)} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}\n"
|
@@ -160,7 +206,7 @@ def crop(id, fn, logos):
|
|
160 |
print(a)
|
161 |
cv2.imwrite(f'{debug_out}/{basename}{x}{y}.debug.png', dim)
|
162 |
|
163 |
-
cv2.imwrite(f'{debug_out}/{basename}.debug.png',
|
164 |
|
165 |
if __name__ == '__main__':
|
166 |
i = 0
|
|
|
3 |
import os
|
4 |
import math
|
5 |
import cv2
|
6 |
+
import numpy as np
|
7 |
from typing import NamedTuple
|
8 |
|
9 |
from entity import Entity
|
|
|
39 |
|
40 |
def read_bounding_boxes(filename):
|
41 |
boxes = []
|
42 |
+
bco = None
|
43 |
with open(filename, 'r') as f:
|
44 |
lines = f.readlines()
|
45 |
for l in lines:
|
46 |
+
(b, x,y,w,h) = [float(i) for i in l.split(' ')]
|
47 |
+
bco = b
|
48 |
if x < 0 or y < 0 or w < 10 or h < 10:
|
49 |
+
print(f"dropping logo, it has inconsistent size: {w}x{h}@{x}x{y}")
|
50 |
continue
|
51 |
boxes.append(BoundingBox(x,y,w,h))
|
52 |
return bco, boxes
|
53 |
|
54 |
+
def coord_dict_to_point(c):
|
55 |
+
return coord_to_point(c['x'], c['y'], c['width'], c['heigh'])
|
56 |
+
|
57 |
+
def coord_to_point(cx, cy, cw, ch):
|
58 |
+
x = math.floor(cx + cw/2)
|
59 |
+
y = math.floor(cy + ch/2)
|
60 |
+
return f"{x} {y} {math.ceil(cw)} {math.ceil(ch)}"
|
61 |
+
|
62 |
def floor_point(x, y):
|
63 |
return (math.floor(x), math.floor(y))
|
64 |
|
|
|
75 |
(x, y, w, h) = floor_logo(l)
|
76 |
return im[x:w, y:h]
|
77 |
|
78 |
+
def add_alpha(img):
|
79 |
+
b, g, r = cv2.split(img)
|
80 |
+
a = np.ones(b.shape, dtype=b.dtype) * 50
|
81 |
+
return cv2.merge((b,g,r,a))
|
82 |
+
|
83 |
+
def remove_white(img):
|
84 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGRA2GRAY)
|
85 |
+
gray = 255*(gray<128)
|
86 |
+
coords = cv2.findNonZero(gray)
|
87 |
+
x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
|
88 |
+
rect = img[y:y+h, x:x+w] # Crop the image - note we do this on the original image
|
89 |
+
|
90 |
+
return rect
|
91 |
+
|
92 |
+
def mix(a, b, fx, fy):
|
93 |
+
(ah, aw, ac) = a.shape
|
94 |
+
(bh, bw, bc) = b.shape
|
95 |
+
|
96 |
+
assert(aw > bw)
|
97 |
+
assert(ah > bh)
|
98 |
+
|
99 |
+
x = math.floor(fx*(aw - bw))
|
100 |
+
y = math.floor(fy*(ah - bh))
|
101 |
+
|
102 |
+
mat = a[y:y+bh,x:x+bw]
|
103 |
+
cols = b[:, :, :3]
|
104 |
+
alpha = b[:, :, 3]/255
|
105 |
+
mask = np.dstack((alpha, alpha, alpha))
|
106 |
+
|
107 |
+
a[y:y+bh,x:x+bw] = mat * (1 - mask) + cols * mask
|
108 |
+
|
109 |
+
return a, BoundingBox(x, y, bw, bh), (aw, ah)
|
110 |
+
|
111 |
def crop(id, fn, logos):
|
112 |
basename = os.path.basename(fn).replace('.png', '')
|
113 |
img_out = f"./data/squares/images"
|
|
|
129 |
for x in range(tx):
|
130 |
for y in range(ty):
|
131 |
color = (0,x*(255/tx),y*(255/ty))
|
132 |
+
logo_color = (255, 0, 0)
|
133 |
|
134 |
if tx < 2:
|
135 |
xs = 0
|
|
|
148 |
rim = cv2.rectangle(rim, start, end, color, 10)
|
149 |
li = []
|
150 |
for l in logos:
|
151 |
+
rim = cv2.rectangle(rim,
|
152 |
+
floor_point(l.x, l.y),
|
153 |
+
floor_point(l.x + l.w, l.y + l.h),
|
154 |
+
logo_color, 5)
|
155 |
def intersect():
|
156 |
six = l.x - f.x
|
157 |
siy = l.y - f.y
|
|
|
183 |
if p:
|
184 |
li.append(p)
|
185 |
|
|
|
|
|
186 |
nim = im[start[1]:end[1], start[0]:end[0]]
|
187 |
rnim = rim[start[1]:end[1], start[0]:end[0]]
|
188 |
img_name =f"{img_out}/{basename}-x{x}y{y}.jpg"
|
|
|
198 |
dim = cv2.rectangle(rnim,
|
199 |
floor_point(cx - p.w/2, cy - p.h/2),
|
200 |
floor_point(cx + p.w/2, cy + p.h/2),
|
201 |
+
logo_color,
|
202 |
5)
|
203 |
|
204 |
a = f"{int(id)} {cx/TILE_SIZE} {cy/TILE_SIZE} {p.w/TILE_SIZE} {p.h/TILE_SIZE}\n"
|
|
|
206 |
print(a)
|
207 |
cv2.imwrite(f'{debug_out}/{basename}{x}{y}.debug.png', dim)
|
208 |
|
209 |
+
cv2.imwrite(f'{debug_out}/{basename}.debug.png', rim)
|
210 |
|
211 |
if __name__ == '__main__':
|
212 |
i = 0
|
python/pipelines.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import imgaug as ia
|
2 |
+
from imgaug import augmenters as iaa
|
3 |
+
|
4 |
+
# Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
|
5 |
+
# e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
|
6 |
+
sometimes = lambda aug: iaa.Sometimes(0.1, aug)
|
7 |
+
|
8 |
+
# Define our sequence of augmentation steps that will be applied to every image
|
9 |
+
# All augmenters with per_channel=0.5 will sample one value _per image_
|
10 |
+
# in 50% of all cases. In all other cases they will sample new values
|
11 |
+
# _per channel_.
|
12 |
+
|
13 |
+
HUGE = sometimes(iaa.Sequential(
|
14 |
+
[
|
15 |
+
# apply the following augmenters to most images
|
16 |
+
iaa.Fliplr(0.5), # horizontally flip 50% of all images
|
17 |
+
iaa.Flipud(0.2), # vertically flip 20% of all images
|
18 |
+
# crop images by -5% to 10% of their height/width
|
19 |
+
sometimes(iaa.CropAndPad(
|
20 |
+
percent=(-0.05, 0.1),
|
21 |
+
pad_mode=ia.ALL,
|
22 |
+
pad_cval=(0, 255)
|
23 |
+
)),
|
24 |
+
sometimes(iaa.Affine(
|
25 |
+
scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
|
26 |
+
translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
|
27 |
+
rotate=(-45, 45), # rotate by -45 to +45 degrees
|
28 |
+
shear=(-16, 16), # shear by -16 to +16 degrees
|
29 |
+
order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
|
30 |
+
cval=(0, 255), # if mode is constant, use a cval between 0 and 255
|
31 |
+
mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
|
32 |
+
)),
|
33 |
+
# execute 0 to 5 of the following (less important) augmenters per image
|
34 |
+
# don't execute all of them, as that would often be way too strong
|
35 |
+
iaa.SomeOf((0, 5),
|
36 |
+
[
|
37 |
+
sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
|
38 |
+
iaa.OneOf([
|
39 |
+
iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
|
40 |
+
iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
|
41 |
+
iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
|
42 |
+
]),
|
43 |
+
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
|
44 |
+
iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
|
45 |
+
# search either for all edges or for directed edges,
|
46 |
+
# blend the result with the original image using a blobby mask
|
47 |
+
iaa.SimplexNoiseAlpha(iaa.OneOf([
|
48 |
+
iaa.EdgeDetect(alpha=(0.5, 1.0)),
|
49 |
+
iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
|
50 |
+
])),
|
51 |
+
iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
|
52 |
+
iaa.OneOf([
|
53 |
+
iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
|
54 |
+
iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
|
55 |
+
]),
|
56 |
+
iaa.Invert(0.05, per_channel=True), # invert color channels
|
57 |
+
iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
|
58 |
+
iaa.AddToHueAndSaturation((-20, 20)), # change hue and saturation
|
59 |
+
# either change the brightness of the whole image (sometimes
|
60 |
+
# per channel) or change the brightness of subareas
|
61 |
+
iaa.OneOf([
|
62 |
+
iaa.Multiply((0.5, 1.5), per_channel=0.5),
|
63 |
+
iaa.FrequencyNoiseAlpha(
|
64 |
+
exponent=(-4, 0),
|
65 |
+
first=iaa.Multiply((0.5, 1.5), per_channel=True),
|
66 |
+
second=iaa.LinearContrast((0.5, 2.0))
|
67 |
+
)
|
68 |
+
]),
|
69 |
+
iaa.LinearContrast((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
|
70 |
+
iaa.Grayscale(alpha=(0.0, 1.0)),
|
71 |
+
sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
|
72 |
+
sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))), # sometimes move parts of the image around
|
73 |
+
sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1)))
|
74 |
+
],
|
75 |
+
random_order=True
|
76 |
+
)
|
77 |
+
],
|
78 |
+
random_order=True
|
79 |
+
))
|
python/requirements.txt
CHANGED
@@ -1,4 +1,11 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
progress==1.6
|
3 |
-
|
4 |
-
requests
|
|
|
|
1 |
+
beautifulsoup4==4.11.1
|
2 |
+
CairoSVG==2.5.2
|
3 |
+
filetype==1.1.0
|
4 |
+
imgaug==0.4.0
|
5 |
+
inotify==0.2.10
|
6 |
+
numpy==1.23.2
|
7 |
+
opencv_python==4.6.0.66
|
8 |
progress==1.6
|
9 |
+
PyYAML==6.0
|
10 |
+
requests==2.27.1
|
11 |
+
selenium==4.4.3
|
python/screenshot.py
CHANGED
@@ -10,16 +10,12 @@ from selenium.webdriver.common.by import By
|
|
10 |
from common import selectors
|
11 |
from entity import Entity
|
12 |
from common import defaults,mkdir
|
|
|
13 |
|
14 |
options = webdriver.FirefoxOptions()
|
15 |
options.add_argument("--headless")
|
16 |
options.add_argument("--window-size=1920x8000")
|
17 |
|
18 |
-
def coord_to_point(c):
|
19 |
-
x = math.floor(c['x'] + c['width']/2)
|
20 |
-
y = math.floor(c['y'] + c['height']/2)
|
21 |
-
return f"{x} {y} {math.ceil(c['width'])} {math.ceil(c['height'])}"
|
22 |
-
|
23 |
driver = webdriver.Firefox(options=options)
|
24 |
def sc_entity(e: Entity):
|
25 |
print(f'screenshoting: {e}')
|
@@ -38,7 +34,7 @@ def sc_entity(e: Entity):
|
|
38 |
logos.extend(driver.find_elements(By.CSS_SELECTOR, selectors.cls_logo) or [])
|
39 |
with open(f"{defaults.LABELS_PATH}/{e.bco}.full.txt", 'w') as f:
|
40 |
for i in logos:
|
41 |
-
f.write(f"{e.id} {
|
42 |
|
43 |
if __name__ == '__main__':
|
44 |
sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
|
|
|
10 |
from common import selectors
|
11 |
from entity import Entity
|
12 |
from common import defaults,mkdir
|
13 |
+
from imtool import coord_dict_to_point
|
14 |
|
15 |
options = webdriver.FirefoxOptions()
|
16 |
options.add_argument("--headless")
|
17 |
options.add_argument("--window-size=1920x8000")
|
18 |
|
|
|
|
|
|
|
|
|
|
|
19 |
driver = webdriver.Firefox(options=options)
|
20 |
def sc_entity(e: Entity):
|
21 |
print(f'screenshoting: {e}')
|
|
|
34 |
logos.extend(driver.find_elements(By.CSS_SELECTOR, selectors.cls_logo) or [])
|
35 |
with open(f"{defaults.LABELS_PATH}/{e.bco}.full.txt", 'w') as f:
|
36 |
for i in logos:
|
37 |
+
f.write(f"{e.id} {coord_dict_to_point(i.rect)}\n")
|
38 |
|
39 |
if __name__ == '__main__':
|
40 |
sc_entity(Entity.from_dict({'url': 'http://www.bbva.com.ar', 'bco': 'debug'}))
|
python/web.py
CHANGED
@@ -17,14 +17,14 @@ def get_page(e: Entity):
|
|
17 |
|
18 |
def get_cert(e: Entity):
|
19 |
ssl_url = e.url.split("/")[2]
|
20 |
-
mkdir.make_dirs(defaults.CERTS_PATH)
|
21 |
try:
|
22 |
cert = ssl.get_server_certificate((ssl_url, 443), ca_certs=None)
|
23 |
fn = f"{defaults.CERTS_PATH}/{e.bco}.cert"
|
24 |
with open(fn, 'w') as f:
|
25 |
f.write(cert)
|
26 |
except Exception as err:
|
27 |
-
with open(f"{
|
28 |
f.write(str(err))
|
29 |
return fn
|
30 |
|
@@ -40,7 +40,7 @@ def get_logos(e: Entity, page):
|
|
40 |
logos.extend(soup.select(selectors.id_logo))
|
41 |
logos.extend(soup.select(selectors.cls_logo))
|
42 |
|
43 |
-
mkdir.make_dirs(defaults.LOGOS_DATA_PATH)
|
44 |
|
45 |
i = 0
|
46 |
lfn = []
|
|
|
17 |
|
18 |
def get_cert(e: Entity):
|
19 |
ssl_url = e.url.split("/")[2]
|
20 |
+
mkdir.make_dirs([defaults.CERTS_PATH])
|
21 |
try:
|
22 |
cert = ssl.get_server_certificate((ssl_url, 443), ca_certs=None)
|
23 |
fn = f"{defaults.CERTS_PATH}/{e.bco}.cert"
|
24 |
with open(fn, 'w') as f:
|
25 |
f.write(cert)
|
26 |
except Exception as err:
|
27 |
+
with open(f"{defaults.DATA_PATH}/{e.bco}.error.log", 'w+') as f:
|
28 |
f.write(str(err))
|
29 |
return fn
|
30 |
|
|
|
40 |
logos.extend(soup.select(selectors.id_logo))
|
41 |
logos.extend(soup.select(selectors.cls_logo))
|
42 |
|
43 |
+
mkdir.make_dirs([defaults.LOGOS_DATA_PATH])
|
44 |
|
45 |
i = 0
|
46 |
lfn = []
|