# coding:utf-8 import glob import csv import cv2 import os import numpy as np from shapely.geometry import Polygon from IndicPhotoOCR.detection import east_config as cfg from IndicPhotoOCR.detection import east_utils def get_images(img_root): files = [] for ext in ['jpg']: files.extend(glob.glob( os.path.join(img_root, '*.{}'.format(ext)))) # print(glob.glob( # os.path.join(FLAGS.training_data_path, '*.{}'.format(ext)))) return files def load_annoataion(p): ''' load annotation from the text file :param p: :return: ''' text_polys = [] text_tags = [] if not os.path.exists(p): return np.array(text_polys, dtype=np.float32) with open(p, 'r', encoding='UTF-8') as f: reader = csv.reader(f) for line in reader: label = line[-1] # strip BOM. \ufeff for python3, \xef\xbb\bf for python2 line = [i.strip('\ufeff').strip('\xef\xbb\xbf') for i in line] x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8])) text_polys.append([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]) # print(text_polys) if label == '*' or label == '###': text_tags.append(True) else: text_tags.append(False) return np.array(text_polys, dtype=np.float32), np.array(text_tags, dtype=np.bool) def polygon_area(poly): ''' compute area of a polygon :param poly: :return: ''' edge = [ (poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1]) ] return np.sum(edge) / 2. def check_and_validate_polys(polys, tags, xxx_todo_changeme): ''' check so that the text poly is in the same direction, and also filter some invalid polygons :param polys: :param tags: :return: ''' (h, w) = xxx_todo_changeme if polys.shape[0] == 0: return polys polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) validated_polys = [] validated_tags = [] # 判断四边形的点时针方向,以及是否是有效四边形 for poly, tag in zip(polys, tags): p_area = polygon_area(poly) if abs(p_area) < 1: # print poly print('invalid poly') continue if p_area > 0: print('poly in wrong direction') poly = poly[(0, 3, 2, 1), :] validated_polys.append(poly) validated_tags.append(tag) return np.array(validated_polys), np.array(validated_tags) def crop_area(im, polys, tags, crop_background=False, max_tries=100): ''' make random crop from the input image :param im: :param polys: :param tags: :param crop_background: :param max_tries: :return: ''' h, w, _ = im.shape pad_h = h // 10 pad_w = w // 10 h_array = np.zeros((h + pad_h * 2), dtype=np.int32) w_array = np.zeros((w + pad_w * 2), dtype=np.int32) for poly in polys: poly = np.round(poly, decimals=0).astype(np.int32) minx = np.min(poly[:, 0]) maxx = np.max(poly[:, 0]) w_array[minx + pad_w:maxx + pad_w] = 1 miny = np.min(poly[:, 1]) maxy = np.max(poly[:, 1]) h_array[miny + pad_h:maxy + pad_h] = 1 # ensure the cropped area not across a text,保证裁剪区域不能与文本交叉 h_axis = np.where(h_array == 0)[0] w_axis = np.where(w_array == 0)[0] if len(h_axis) == 0 or len(w_axis) == 0: return im, polys, tags for i in range(max_tries): # 试验50次 xx = np.random.choice(w_axis, size=2) xmin = np.min(xx) - pad_w xmax = np.max(xx) - pad_w xmin = np.clip(xmin, 0, w - 1) xmax = np.clip(xmax, 0, w - 1) yy = np.random.choice(h_axis, size=2) ymin = np.min(yy) - pad_h ymax = np.max(yy) - pad_h ymin = np.clip(ymin, 0, h - 1) ymax = np.clip(ymax, 0, h - 1) if xmax - xmin < cfg.min_crop_side_ratio * w or ymax - ymin < cfg.min_crop_side_ratio * h: # area too small continue if polys.shape[0] != 0: poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \ & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax) selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0] else: selected_polys = [] if len(selected_polys) == 0: # no text in this area if crop_background: return im[ymin:ymax + 1, xmin:xmax + 1, :], polys[selected_polys], tags[selected_polys] else: continue im = im[ymin:ymax + 1, xmin:xmax + 1, :] polys = polys[selected_polys] tags = tags[selected_polys] polys[:, :, 0] -= xmin polys[:, :, 1] -= ymin return im, polys, tags return im, polys, tags def shrink_poly(poly, r): ''' fit a poly inside the origin poly, maybe bugs here... used for generate the score map :param poly: the text poly :param r: r in the paper :return: the shrinked poly ''' # shrink ratio R = 0.3 # find the longer pair if np.linalg.norm(poly[0] - poly[1]) + np.linalg.norm(poly[2] - poly[3]) > \ np.linalg.norm(poly[0] - poly[3]) + np.linalg.norm(poly[1] - poly[2]): # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2) ## p0, p1 theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0])) poly[0][0] += R * r[0] * np.cos(theta) poly[0][1] += R * r[0] * np.sin(theta) poly[1][0] -= R * r[1] * np.cos(theta) poly[1][1] -= R * r[1] * np.sin(theta) ## p2, p3 theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0])) poly[3][0] += R * r[3] * np.cos(theta) poly[3][1] += R * r[3] * np.sin(theta) poly[2][0] -= R * r[2] * np.cos(theta) poly[2][1] -= R * r[2] * np.sin(theta) ## p0, p3 theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1])) poly[0][0] += R * r[0] * np.sin(theta) poly[0][1] += R * r[0] * np.cos(theta) poly[3][0] -= R * r[3] * np.sin(theta) poly[3][1] -= R * r[3] * np.cos(theta) ## p1, p2 theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1])) poly[1][0] += R * r[1] * np.sin(theta) poly[1][1] += R * r[1] * np.cos(theta) poly[2][0] -= R * r[2] * np.sin(theta) poly[2][1] -= R * r[2] * np.cos(theta) else: ## p0, p3 # print poly theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1])) poly[0][0] += R * r[0] * np.sin(theta) poly[0][1] += R * r[0] * np.cos(theta) poly[3][0] -= R * r[3] * np.sin(theta) poly[3][1] -= R * r[3] * np.cos(theta) ## p1, p2 theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1])) poly[1][0] += R * r[1] * np.sin(theta) poly[1][1] += R * r[1] * np.cos(theta) poly[2][0] -= R * r[2] * np.sin(theta) poly[2][1] -= R * r[2] * np.cos(theta) ## p0, p1 theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0])) poly[0][0] += R * r[0] * np.cos(theta) poly[0][1] += R * r[0] * np.sin(theta) poly[1][0] -= R * r[1] * np.cos(theta) poly[1][1] -= R * r[1] * np.sin(theta) ## p2, p3 theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0])) poly[3][0] += R * r[3] * np.cos(theta) poly[3][1] += R * r[3] * np.sin(theta) poly[2][0] -= R * r[2] * np.cos(theta) poly[2][1] -= R * r[2] * np.sin(theta) return poly # def point_dist_to_line(p1, p2, p3): # # compute the distance from p3 to p1-p2 # return np.linalg.norm(np.cross(p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1) # 点p3到直线p12的距离 def point_dist_to_line(p1, p2, p3): # compute the distance from p3 to p1-p2 # return np.linalg.norm(np.cross(p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1) a = np.linalg.norm(p1 - p2) b = np.linalg.norm(p2 - p3) c = np.linalg.norm(p3 - p1) s = (a + b + c) / 2.0 area = np.abs((s * (s - a) * (s - b) * (s - c))) ** 0.5 if a < 1.0: return (b + c) / 2.0 return 2 * area / a def fit_line(p1, p2): # fit a line ax+by+c = 0 if p1[0] == p1[1]: return [1., 0., -p1[0]] else: [k, b] = np.polyfit(p1, p2, deg=1) return [k, -1., b] def line_cross_point(line1, line2): # line1 0= ax+by+c, compute the cross point of line1 and line2 if line1[0] != 0 and line1[0] == line2[0]: print('Cross point does not exist') return None if line1[0] == 0 and line2[0] == 0: print('Cross point does not exist') return None if line1[1] == 0: x = -line1[2] y = line2[0] * x + line2[2] elif line2[1] == 0: x = -line2[2] y = line1[0] * x + line1[2] else: k1, _, b1 = line1 k2, _, b2 = line2 x = -(b1 - b2) / (k1 - k2) y = k1 * x + b1 return np.array([x, y], dtype=np.float32) def line_verticle(line, point): # get the verticle line from line across point if line[1] == 0: verticle = [0, -1, point[1]] else: if line[0] == 0: verticle = [1, 0, -point[0]] else: verticle = [-1. / line[0], -1, point[1] - (-1 / line[0] * point[0])] return verticle def rectangle_from_parallelogram(poly): ''' fit a rectangle from a parallelogram :param poly: :return: ''' p0, p1, p2, p3 = poly angle_p0 = np.arccos(np.dot(p1 - p0, p3 - p0) / (np.linalg.norm(p0 - p1) * np.linalg.norm(p3 - p0))) if angle_p0 < 0.5 * np.pi: if np.linalg.norm(p0 - p1) > np.linalg.norm(p0 - p3): # p0 and p2 ## p0 p2p3 = fit_line([p2[0], p3[0]], [p2[1], p3[1]]) p2p3_verticle = line_verticle(p2p3, p0) new_p3 = line_cross_point(p2p3, p2p3_verticle) ## p2 p0p1 = fit_line([p0[0], p1[0]], [p0[1], p1[1]]) p0p1_verticle = line_verticle(p0p1, p2) new_p1 = line_cross_point(p0p1, p0p1_verticle) return np.array([p0, new_p1, p2, new_p3], dtype=np.float32) else: p1p2 = fit_line([p1[0], p2[0]], [p1[1], p2[1]]) p1p2_verticle = line_verticle(p1p2, p0) new_p1 = line_cross_point(p1p2, p1p2_verticle) p0p3 = fit_line([p0[0], p3[0]], [p0[1], p3[1]]) p0p3_verticle = line_verticle(p0p3, p2) new_p3 = line_cross_point(p0p3, p0p3_verticle) return np.array([p0, new_p1, p2, new_p3], dtype=np.float32) else: if np.linalg.norm(p0 - p1) > np.linalg.norm(p0 - p3): # p1 and p3 ## p1 p2p3 = fit_line([p2[0], p3[0]], [p2[1], p3[1]]) p2p3_verticle = line_verticle(p2p3, p1) new_p2 = line_cross_point(p2p3, p2p3_verticle) ## p3 p0p1 = fit_line([p0[0], p1[0]], [p0[1], p1[1]]) p0p1_verticle = line_verticle(p0p1, p3) new_p0 = line_cross_point(p0p1, p0p1_verticle) return np.array([new_p0, p1, new_p2, p3], dtype=np.float32) else: p0p3 = fit_line([p0[0], p3[0]], [p0[1], p3[1]]) p0p3_verticle = line_verticle(p0p3, p1) new_p0 = line_cross_point(p0p3, p0p3_verticle) p1p2 = fit_line([p1[0], p2[0]], [p1[1], p2[1]]) p1p2_verticle = line_verticle(p1p2, p3) new_p2 = line_cross_point(p1p2, p1p2_verticle) return np.array([new_p0, p1, new_p2, p3], dtype=np.float32) def sort_rectangle(poly): # sort the four coordinates of the polygon, points in poly should be sorted clockwise # First find the lowest point p_lowest = np.argmax(poly[:, 1]) if np.count_nonzero(poly[:, 1] == poly[p_lowest, 1]) == 2: # 底边平行于X轴, 那么p0为左上角 - if the bottom line is parallel to x-axis, then p0 must be the upper-left corner p0_index = np.argmin(np.sum(poly, axis=1)) p1_index = (p0_index + 1) % 4 p2_index = (p0_index + 2) % 4 p3_index = (p0_index + 3) % 4 return poly[[p0_index, p1_index, p2_index, p3_index]], 0. else: # 找到最低点右边的点 - find the point that sits right to the lowest point p_lowest_right = (p_lowest - 1) % 4 p_lowest_left = (p_lowest + 1) % 4 angle = np.arctan( -(poly[p_lowest][1] - poly[p_lowest_right][1]) / (poly[p_lowest][0] - poly[p_lowest_right][0])) # assert angle > 0 if angle <= 0: print(angle, poly[p_lowest], poly[p_lowest_right]) if angle / np.pi * 180 > 45: # 这个点为p2 - this point is p2 p2_index = p_lowest p1_index = (p2_index - 1) % 4 p0_index = (p2_index - 2) % 4 p3_index = (p2_index + 1) % 4 return poly[[p0_index, p1_index, p2_index, p3_index]], -(np.pi / 2 - angle) else: # 这个点为p3 - this point is p3 p3_index = p_lowest p0_index = (p3_index + 1) % 4 p1_index = (p3_index + 2) % 4 p2_index = (p3_index + 3) % 4 return poly[[p0_index, p1_index, p2_index, p3_index]], angle def restore_rectangle_rbox(origin, geometry): d = geometry[:, :4] angle = geometry[:, 4] # for angle > 0 origin_0 = origin[angle >= 0] d_0 = d[angle >= 0] angle_0 = angle[angle >= 0] if origin_0.shape[0] > 0: p = np.array([np.zeros(d_0.shape[0]), -d_0[:, 0] - d_0[:, 2], d_0[:, 1] + d_0[:, 3], -d_0[:, 0] - d_0[:, 2], d_0[:, 1] + d_0[:, 3], np.zeros(d_0.shape[0]), np.zeros(d_0.shape[0]), np.zeros(d_0.shape[0]), d_0[:, 3], -d_0[:, 2]]) p = p.transpose((1, 0)).reshape((-1, 5, 2)) # N*5*2 rotate_matrix_x = np.array([np.cos(angle_0), np.sin(angle_0)]).transpose((1, 0)) rotate_matrix_x = np.repeat(rotate_matrix_x, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1)) # N*5*2 rotate_matrix_y = np.array([-np.sin(angle_0), np.cos(angle_0)]).transpose((1, 0)) rotate_matrix_y = np.repeat(rotate_matrix_y, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1)) p_rotate_x = np.sum(rotate_matrix_x * p, axis=2)[:, :, np.newaxis] # N*5*1 p_rotate_y = np.sum(rotate_matrix_y * p, axis=2)[:, :, np.newaxis] # N*5*1 p_rotate = np.concatenate([p_rotate_x, p_rotate_y], axis=2) # N*5*2 p3_in_origin = origin_0 - p_rotate[:, 4, :] new_p0 = p_rotate[:, 0, :] + p3_in_origin # N*2 new_p1 = p_rotate[:, 1, :] + p3_in_origin new_p2 = p_rotate[:, 2, :] + p3_in_origin new_p3 = p_rotate[:, 3, :] + p3_in_origin new_p_0 = np.concatenate([new_p0[:, np.newaxis, :], new_p1[:, np.newaxis, :], new_p2[:, np.newaxis, :], new_p3[:, np.newaxis, :]], axis=1) # N*4*2 else: new_p_0 = np.zeros((0, 4, 2)) # for angle < 0 origin_1 = origin[angle < 0] d_1 = d[angle < 0] angle_1 = angle[angle < 0] if origin_1.shape[0] > 0: p = np.array([-d_1[:, 1] - d_1[:, 3], -d_1[:, 0] - d_1[:, 2], np.zeros(d_1.shape[0]), -d_1[:, 0] - d_1[:, 2], np.zeros(d_1.shape[0]), np.zeros(d_1.shape[0]), -d_1[:, 1] - d_1[:, 3], np.zeros(d_1.shape[0]), -d_1[:, 1], -d_1[:, 2]]) p = p.transpose((1, 0)).reshape((-1, 5, 2)) # N*5*2 rotate_matrix_x = np.array([np.cos(-angle_1), -np.sin(-angle_1)]).transpose((1, 0)) rotate_matrix_x = np.repeat(rotate_matrix_x, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1)) # N*5*2 rotate_matrix_y = np.array([np.sin(-angle_1), np.cos(-angle_1)]).transpose((1, 0)) rotate_matrix_y = np.repeat(rotate_matrix_y, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1)) p_rotate_x = np.sum(rotate_matrix_x * p, axis=2)[:, :, np.newaxis] # N*5*1 p_rotate_y = np.sum(rotate_matrix_y * p, axis=2)[:, :, np.newaxis] # N*5*1 p_rotate = np.concatenate([p_rotate_x, p_rotate_y], axis=2) # N*5*2 p3_in_origin = origin_1 - p_rotate[:, 4, :] new_p0 = p_rotate[:, 0, :] + p3_in_origin # N*2 new_p1 = p_rotate[:, 1, :] + p3_in_origin new_p2 = p_rotate[:, 2, :] + p3_in_origin new_p3 = p_rotate[:, 3, :] + p3_in_origin new_p_1 = np.concatenate([new_p0[:, np.newaxis, :], new_p1[:, np.newaxis, :], new_p2[:, np.newaxis, :], new_p3[:, np.newaxis, :]], axis=1) # N*4*2 else: new_p_1 = np.zeros((0, 4, 2)) return np.concatenate([new_p_0, new_p_1]) def restore_rectangle(origin, geometry): return restore_rectangle_rbox(origin, geometry) def generate_rbox(im_size, polys, tags): h, w = im_size poly_mask = np.zeros((h, w), dtype=np.uint8) score_map = np.zeros((h, w), dtype=np.uint8) geo_map = np.zeros((h, w, 5), dtype=np.float32) # mask used during traning, to ignore some hard areas,用于忽略那些过小的文本 training_mask = np.ones((h, w), dtype=np.uint8) for poly_idx, poly_tag in enumerate(zip(polys, tags)): poly = poly_tag[0] tag = poly_tag[1] # 对每个顶点,找到经过他的两条边中较短的那条 r = [None, None, None, None] for i in range(4): r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]), np.linalg.norm(poly[i] - poly[(i - 1) % 4])) # score map # 放缩边框为之前的0.3倍,并对边框对应score图中的位置进行填充 shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :] cv2.fillPoly(score_map, shrinked_poly, 1) cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) # if the poly is too small, then ignore it during training # 如果文本框标签太小或者txt中没具体标记是什么内容,即*或者###,则加掩模,训练时忽略该部分 poly_h = min(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2])) poly_w = min(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3])) if min(poly_h, poly_w) < cfg.min_text_size: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) if tag: cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) # 当前新加入的文本框区域像素点 xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) # if geometry == 'RBOX': # 对任意两个顶点的组合生成一个平行四边形 - generate a parallelogram for any combination of two vertices fitted_parallelograms = [] for i in range(4): # 选中p0和p1的连线边,生成两个平行四边形 p0 = poly[i] p1 = poly[(i + 1) % 4] p2 = poly[(i + 2) % 4] p3 = poly[(i + 3) % 4] # 拟合ax+by+c=0 edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]]) backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]]) forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]]) # 通过另外两个点距离edge的距离,来决定edge对应的平行线应该过p2还是p3 if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3): # 平行线经过p2 - parallel lines through p2 if edge[1] == 0: edge_opposite = [1, 0, -p2[0]] else: edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]] else: # 经过p3 - after p3 if edge[1] == 0: edge_opposite = [1, 0, -p3[0]] else: edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]] # move forward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p2 = line_cross_point(forward_edge, edge_opposite) if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line(p1, new_p2, p3): # across p0 if forward_edge[1] == 0: forward_opposite = [1, 0, -p0[0]] else: forward_opposite = [forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0]] else: # across p3 if forward_edge[1] == 0: forward_opposite = [1, 0, -p3[0]] else: forward_opposite = [forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0]] new_p0 = line_cross_point(forward_opposite, edge) new_p3 = line_cross_point(forward_opposite, edge_opposite) fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0]) # or move backward edge new_p0 = p0 new_p1 = p1 new_p2 = p2 new_p3 = p3 new_p3 = line_cross_point(backward_edge, edge_opposite) if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2): # across p1 if backward_edge[1] == 0: backward_opposite = [1, 0, -p1[0]] else: backward_opposite = [backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0]] else: # across p2 if backward_edge[1] == 0: backward_opposite = [1, 0, -p2[0]] else: backward_opposite = [backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0]] new_p1 = line_cross_point(backward_opposite, edge) new_p2 = line_cross_point(backward_opposite, edge_opposite) fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0]) # 选定面积最小的平行四边形 areas = [Polygon(t).area for t in fitted_parallelograms] parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32) # sort thie polygon parallelogram_coord_sum = np.sum(parallelogram, axis=1) min_coord_idx = np.argmin(parallelogram_coord_sum) parallelogram = parallelogram[ [min_coord_idx, (min_coord_idx + 1) % 4, (min_coord_idx + 2) % 4, (min_coord_idx + 3) % 4]] # 得到外包矩形即旋转角 rectange = rectangle_from_parallelogram(parallelogram) rectange, rotate_angle = sort_rectangle(rectange) p0_rect, p1_rect, p2_rect, p3_rect = rectange # 对当前新加入的文本框区域像素点,根据其到矩形四边的距离修改geo_map for y, x in xy_in_poly: point = np.array([x, y], dtype=np.float32) # top geo_map[y, x, 0] = point_dist_to_line(p0_rect, p1_rect, point) # right geo_map[y, x, 1] = point_dist_to_line(p1_rect, p2_rect, point) # down geo_map[y, x, 2] = point_dist_to_line(p2_rect, p3_rect, point) # left geo_map[y, x, 3] = point_dist_to_line(p3_rect, p0_rect, point) # angle geo_map[y, x, 4] = rotate_angle return score_map, geo_map, training_mask def generator(index, input_size=512, background_ratio=3. / 8, # 纯背景样本比例 random_scale=np.array([0.5, 1, 2.0, 3.0]), # 提取多尺度图片信息 image_list=None): try: im_fn = image_list[index] im = cv2.imread(im_fn) if im is None: print("can't find image") return None, None, None, None, None h, w, _ = im.shape # 所以要把gt去掉 txt_fn = im_fn.replace(os.path.basename(im_fn).split('.')[1], 'txt') if not os.path.exists(txt_fn): print('text file {} does not exists'.format(txt_fn)) return None, None, None, None, None # 加载标注框信息 text_polys, text_tags = load_annoataion(txt_fn) text_polys, text_tags = check_and_validate_polys(text_polys, text_tags, (h, w)) # random scale this image,随机选择一种尺度 rd_scale = np.random.choice(random_scale) im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) text_polys *= rd_scale # random crop a area from image,3/8的选中的概率,裁剪纯背景的图片 if np.random.rand() < background_ratio: # crop background im, text_polys, text_tags = crop_area(im, text_polys, text_tags, crop_background=True) if text_polys.shape[0] > 0: # print("cannot find background") return None, None, None, None, None # pad and resize image new_h, new_w, _ = im.shape max_h_w_i = np.max([new_h, new_w, input_size]) im_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8) im_padded[:new_h, :new_w, :] = im.copy() # 将裁剪后图片扩充成512*512的图片 im = cv2.resize(im_padded, dsize=(input_size, input_size)) score_map = np.zeros((input_size, input_size), dtype=np.uint8) geo_map_channels = 5 if cfg.geometry == 'RBOX' else 8 geo_map = np.zeros((input_size, input_size, geo_map_channels), dtype=np.float32) training_mask = np.ones((input_size, input_size), dtype=np.uint8) else: # 5 / 8的选中的概率,裁剪含文本信息的图片 im, text_polys, text_tags = crop_area(im, text_polys, text_tags, crop_background=False) if text_polys.shape[0] == 0: # print("cannot find txt ground") return None, None, None, None, None h, w, _ = im.shape # pad the image to the training input size or the longer side of image new_h, new_w, _ = im.shape max_h_w_i = np.max([new_h, new_w, input_size]) im_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8) im_padded[:new_h, :new_w, :] = im.copy() im = im_padded # resize the image to input size # 填充,resize图像至设定尺寸 new_h, new_w, _ = im.shape resize_h = input_size resize_w = input_size im = cv2.resize(im, dsize=(resize_w, resize_h)) # 将文本框坐标标签等比例修改 resize_ratio_3_x = resize_w / float(new_w) resize_ratio_3_y = resize_h / float(new_h) text_polys[:, :, 0] *= resize_ratio_3_x text_polys[:, :, 1] *= resize_ratio_3_y new_h, new_w, _ = im.shape score_map, geo_map, training_mask = generate_rbox((new_h, new_w), text_polys, text_tags) # 将一个样本的样本内容和标签信息append images = im[:,:,::-1].astype(np.float32) # 文件名加入列表 image_fns = im_fn # 512*512取提取四分之一行列 score_maps = score_map[::4, ::4, np.newaxis].astype(np.float32) geo_maps = geo_map[::4, ::4, :].astype(np.float32) training_masks = training_mask[::4, ::4, np.newaxis].astype(np.float32) # 符合一个样本之后输出 return images, image_fns, score_maps, geo_maps, training_masks except Exception as e: import traceback traceback.print_exc() # print("Exception is exist!") return None, None, None, None, None