Spaces:

shreyasvaidya
/

Scene-Text-Translator

Running

App Files Files Community

Scene-Text-Translator / IndicPhotoOCR /detection /east_preprossing.py

shreyasvaidya

Upload folder using huggingface_hub

01bb3bb verified about 1 month ago

raw

history blame

28.1 kB


	# coding:utf-8
	import glob
	import csv
	import cv2
	import os
	import numpy as np
	from shapely.geometry import Polygon


	from IndicPhotoOCR.detection import east_config as cfg
	from IndicPhotoOCR.detection import east_utils


	def get_images(img_root):
	files = []
	for ext in ['jpg']:
	files.extend(glob.glob(
	os.path.join(img_root, '*.{}'.format(ext))))
	# print(glob.glob(
	# os.path.join(FLAGS.training_data_path, '*.{}'.format(ext))))
	return files


	def load_annoataion(p):
	'''
	load annotation from the text file
	:param p:
	:return:
	'''
	text_polys = []
	text_tags = []
	if not os.path.exists(p):
	return np.array(text_polys, dtype=np.float32)
	with open(p, 'r', encoding='UTF-8') as f:
	reader = csv.reader(f)
	for line in reader:
	label = line[-1]
	# strip BOM. \ufeff for python3, \xef\xbb\bf for python2
	line = [i.strip('\ufeff').strip('\xef\xbb\xbf') for i in line]

	x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
	text_polys.append([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
	# print(text_polys)
	if label == '*' or label == '###':
	text_tags.append(True)
	else:
	text_tags.append(False)
	return np.array(text_polys, dtype=np.float32), np.array(text_tags, dtype=np.bool)


	def polygon_area(poly):
	'''
	compute area of a polygon
	:param poly:
	:return:
	'''
	edge = [
	(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
	(poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
	(poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
	(poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])
	]
	return np.sum(edge) / 2.


	def check_and_validate_polys(polys, tags, xxx_todo_changeme):
	'''
	check so that the text poly is in the same direction,
	and also filter some invalid polygons
	:param polys:
	:param tags:
	:return:
	'''
	(h, w) = xxx_todo_changeme
	if polys.shape[0] == 0:
	return polys
	polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
	polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)

	validated_polys = []
	validated_tags = []

	# 判断四边形的点时针方向，以及是否是有效四边形
	for poly, tag in zip(polys, tags):
	p_area = polygon_area(poly)
	if abs(p_area) < 1:
	# print poly
	print('invalid poly')
	continue
	if p_area > 0:
	print('poly in wrong direction')
	poly = poly[(0, 3, 2, 1), :]
	validated_polys.append(poly)
	validated_tags.append(tag)
	return np.array(validated_polys), np.array(validated_tags)


	def crop_area(im, polys, tags, crop_background=False, max_tries=100):
	'''
	make random crop from the input image
	:param im:
	:param polys:
	:param tags:
	:param crop_background:
	:param max_tries:
	:return:
	'''
	h, w, _ = im.shape
	pad_h = h // 10
	pad_w = w // 10
	h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
	w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
	for poly in polys:
	poly = np.round(poly, decimals=0).astype(np.int32)
	minx = np.min(poly[:, 0])
	maxx = np.max(poly[:, 0])
	w_array[minx + pad_w:maxx + pad_w] = 1
	miny = np.min(poly[:, 1])
	maxy = np.max(poly[:, 1])
	h_array[miny + pad_h:maxy + pad_h] = 1
	# ensure the cropped area not across a text，保证裁剪区域不能与文本交叉
	h_axis = np.where(h_array == 0)[0]
	w_axis = np.where(w_array == 0)[0]
	if len(h_axis) == 0 or len(w_axis) == 0:
	return im, polys, tags
	for i in range(max_tries): # 试验50次
	xx = np.random.choice(w_axis, size=2)
	xmin = np.min(xx) - pad_w
	xmax = np.max(xx) - pad_w
	xmin = np.clip(xmin, 0, w - 1)
	xmax = np.clip(xmax, 0, w - 1)
	yy = np.random.choice(h_axis, size=2)
	ymin = np.min(yy) - pad_h
	ymax = np.max(yy) - pad_h
	ymin = np.clip(ymin, 0, h - 1)
	ymax = np.clip(ymax, 0, h - 1)
	if xmax - xmin < cfg.min_crop_side_ratio * w or ymax - ymin < cfg.min_crop_side_ratio * h:
	# area too small
	continue
	if polys.shape[0] != 0:
	poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
	& (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
	selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0]
	else:
	selected_polys = []
	if len(selected_polys) == 0:
	# no text in this area
	if crop_background:
	return im[ymin:ymax + 1, xmin:xmax + 1, :], polys[selected_polys], tags[selected_polys]
	else:
	continue
	im = im[ymin:ymax + 1, xmin:xmax + 1, :]
	polys = polys[selected_polys]
	tags = tags[selected_polys]
	polys[:, :, 0] -= xmin
	polys[:, :, 1] -= ymin
	return im, polys, tags

	return im, polys, tags


	def shrink_poly(poly, r):
	'''
	fit a poly inside the origin poly, maybe bugs here...
	used for generate the score map
	:param poly: the text poly
	:param r: r in the paper
	:return: the shrinked poly
	'''
	# shrink ratio
	R = 0.3
	# find the longer pair
	if np.linalg.norm(poly[0] - poly[1]) + np.linalg.norm(poly[2] - poly[3]) > \
	np.linalg.norm(poly[0] - poly[3]) + np.linalg.norm(poly[1] - poly[2]):
	# first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
	## p0, p1
	theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0]))
	poly[0][0] += R * r[0] * np.cos(theta)
	poly[0][1] += R * r[0] * np.sin(theta)
	poly[1][0] -= R * r[1] * np.cos(theta)
	poly[1][1] -= R * r[1] * np.sin(theta)
	## p2, p3
	theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0]))
	poly[3][0] += R * r[3] * np.cos(theta)
	poly[3][1] += R * r[3] * np.sin(theta)
	poly[2][0] -= R * r[2] * np.cos(theta)
	poly[2][1] -= R * r[2] * np.sin(theta)
	## p0, p3
	theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1]))
	poly[0][0] += R * r[0] * np.sin(theta)
	poly[0][1] += R * r[0] * np.cos(theta)
	poly[3][0] -= R * r[3] * np.sin(theta)
	poly[3][1] -= R * r[3] * np.cos(theta)
	## p1, p2
	theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1]))
	poly[1][0] += R * r[1] * np.sin(theta)
	poly[1][1] += R * r[1] * np.cos(theta)
	poly[2][0] -= R * r[2] * np.sin(theta)
	poly[2][1] -= R * r[2] * np.cos(theta)
	else:
	## p0, p3
	# print poly
	theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1]))
	poly[0][0] += R * r[0] * np.sin(theta)
	poly[0][1] += R * r[0] * np.cos(theta)
	poly[3][0] -= R * r[3] * np.sin(theta)
	poly[3][1] -= R * r[3] * np.cos(theta)
	## p1, p2
	theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1]))
	poly[1][0] += R * r[1] * np.sin(theta)
	poly[1][1] += R * r[1] * np.cos(theta)
	poly[2][0] -= R * r[2] * np.sin(theta)
	poly[2][1] -= R * r[2] * np.cos(theta)
	## p0, p1
	theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0]))
	poly[0][0] += R * r[0] * np.cos(theta)
	poly[0][1] += R * r[0] * np.sin(theta)
	poly[1][0] -= R * r[1] * np.cos(theta)
	poly[1][1] -= R * r[1] * np.sin(theta)
	## p2, p3
	theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0]))
	poly[3][0] += R * r[3] * np.cos(theta)
	poly[3][1] += R * r[3] * np.sin(theta)
	poly[2][0] -= R * r[2] * np.cos(theta)
	poly[2][1] -= R * r[2] * np.sin(theta)
	return poly


	# def point_dist_to_line(p1, p2, p3):
	# # compute the distance from p3 to p1-p2
	# return np.linalg.norm(np.cross(p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1)


	# 点p3到直线p12的距离
	def point_dist_to_line(p1, p2, p3):
	# compute the distance from p3 to p1-p2
	# return np.linalg.norm(np.cross(p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1)
	a = np.linalg.norm(p1 - p2)
	b = np.linalg.norm(p2 - p3)
	c = np.linalg.norm(p3 - p1)
	s = (a + b + c) / 2.0
	area = np.abs((s * (s - a) * (s - b) * (s - c))) ** 0.5
	if a < 1.0:
	return (b + c) / 2.0
	return 2 * area / a


	def fit_line(p1, p2):
	# fit a line ax+by+c = 0
	if p1[0] == p1[1]:
	return [1., 0., -p1[0]]
	else:
	[k, b] = np.polyfit(p1, p2, deg=1)
	return [k, -1., b]


	def line_cross_point(line1, line2):
	# line1 0= ax+by+c, compute the cross point of line1 and line2
	if line1[0] != 0 and line1[0] == line2[0]:
	print('Cross point does not exist')
	return None
	if line1[0] == 0 and line2[0] == 0:
	print('Cross point does not exist')
	return None
	if line1[1] == 0:
	x = -line1[2]
	y = line2[0] * x + line2[2]
	elif line2[1] == 0:
	x = -line2[2]
	y = line1[0] * x + line1[2]
	else:
	k1, _, b1 = line1
	k2, _, b2 = line2
	x = -(b1 - b2) / (k1 - k2)
	y = k1 * x + b1
	return np.array([x, y], dtype=np.float32)


	def line_verticle(line, point):
	# get the verticle line from line across point
	if line[1] == 0:
	verticle = [0, -1, point[1]]
	else:
	if line[0] == 0:
	verticle = [1, 0, -point[0]]
	else:
	verticle = [-1. / line[0], -1, point[1] - (-1 / line[0] * point[0])]
	return verticle


	def rectangle_from_parallelogram(poly):
	'''
	fit a rectangle from a parallelogram
	:param poly:
	:return:
	'''
	p0, p1, p2, p3 = poly
	angle_p0 = np.arccos(np.dot(p1 - p0, p3 - p0) / (np.linalg.norm(p0 - p1) * np.linalg.norm(p3 - p0)))
	if angle_p0 < 0.5 * np.pi:
	if np.linalg.norm(p0 - p1) > np.linalg.norm(p0 - p3):
	# p0 and p2
	## p0
	p2p3 = fit_line([p2[0], p3[0]], [p2[1], p3[1]])
	p2p3_verticle = line_verticle(p2p3, p0)

	new_p3 = line_cross_point(p2p3, p2p3_verticle)
	## p2
	p0p1 = fit_line([p0[0], p1[0]], [p0[1], p1[1]])
	p0p1_verticle = line_verticle(p0p1, p2)

	new_p1 = line_cross_point(p0p1, p0p1_verticle)
	return np.array([p0, new_p1, p2, new_p3], dtype=np.float32)
	else:
	p1p2 = fit_line([p1[0], p2[0]], [p1[1], p2[1]])
	p1p2_verticle = line_verticle(p1p2, p0)

	new_p1 = line_cross_point(p1p2, p1p2_verticle)
	p0p3 = fit_line([p0[0], p3[0]], [p0[1], p3[1]])
	p0p3_verticle = line_verticle(p0p3, p2)

	new_p3 = line_cross_point(p0p3, p0p3_verticle)
	return np.array([p0, new_p1, p2, new_p3], dtype=np.float32)
	else:
	if np.linalg.norm(p0 - p1) > np.linalg.norm(p0 - p3):
	# p1 and p3
	## p1
	p2p3 = fit_line([p2[0], p3[0]], [p2[1], p3[1]])
	p2p3_verticle = line_verticle(p2p3, p1)

	new_p2 = line_cross_point(p2p3, p2p3_verticle)
	## p3
	p0p1 = fit_line([p0[0], p1[0]], [p0[1], p1[1]])
	p0p1_verticle = line_verticle(p0p1, p3)

	new_p0 = line_cross_point(p0p1, p0p1_verticle)
	return np.array([new_p0, p1, new_p2, p3], dtype=np.float32)
	else:
	p0p3 = fit_line([p0[0], p3[0]], [p0[1], p3[1]])
	p0p3_verticle = line_verticle(p0p3, p1)

	new_p0 = line_cross_point(p0p3, p0p3_verticle)
	p1p2 = fit_line([p1[0], p2[0]], [p1[1], p2[1]])
	p1p2_verticle = line_verticle(p1p2, p3)

	new_p2 = line_cross_point(p1p2, p1p2_verticle)
	return np.array([new_p0, p1, new_p2, p3], dtype=np.float32)


	def sort_rectangle(poly):
	# sort the four coordinates of the polygon, points in poly should be sorted clockwise
	# First find the lowest point
	p_lowest = np.argmax(poly[:, 1])
	if np.count_nonzero(poly[:, 1] == poly[p_lowest, 1]) == 2:
	# 底边平行于X轴, 那么p0为左上角 - if the bottom line is parallel to x-axis, then p0 must be the upper-left corner
	p0_index = np.argmin(np.sum(poly, axis=1))
	p1_index = (p0_index + 1) % 4
	p2_index = (p0_index + 2) % 4
	p3_index = (p0_index + 3) % 4
	return poly[[p0_index, p1_index, p2_index, p3_index]], 0.
	else:
	# 找到最低点右边的点 - find the point that sits right to the lowest point
	p_lowest_right = (p_lowest - 1) % 4
	p_lowest_left = (p_lowest + 1) % 4
	angle = np.arctan(
	-(poly[p_lowest][1] - poly[p_lowest_right][1]) / (poly[p_lowest][0] - poly[p_lowest_right][0]))
	# assert angle > 0
	if angle <= 0:
	print(angle, poly[p_lowest], poly[p_lowest_right])
	if angle / np.pi * 180 > 45:
	# 这个点为p2 - this point is p2
	p2_index = p_lowest
	p1_index = (p2_index - 1) % 4
	p0_index = (p2_index - 2) % 4
	p3_index = (p2_index + 1) % 4
	return poly[[p0_index, p1_index, p2_index, p3_index]], -(np.pi / 2 - angle)
	else:
	# 这个点为p3 - this point is p3
	p3_index = p_lowest
	p0_index = (p3_index + 1) % 4
	p1_index = (p3_index + 2) % 4
	p2_index = (p3_index + 3) % 4
	return poly[[p0_index, p1_index, p2_index, p3_index]], angle


	def restore_rectangle_rbox(origin, geometry):
	d = geometry[:, :4]
	angle = geometry[:, 4]
	# for angle > 0
	origin_0 = origin[angle >= 0]
	d_0 = d[angle >= 0]
	angle_0 = angle[angle >= 0]
	if origin_0.shape[0] > 0:
	p = np.array([np.zeros(d_0.shape[0]), -d_0[:, 0] - d_0[:, 2],
	d_0[:, 1] + d_0[:, 3], -d_0[:, 0] - d_0[:, 2],
	d_0[:, 1] + d_0[:, 3], np.zeros(d_0.shape[0]),
	np.zeros(d_0.shape[0]), np.zeros(d_0.shape[0]),
	d_0[:, 3], -d_0[:, 2]])
	p = p.transpose((1, 0)).reshape((-1, 5, 2)) # N52

	rotate_matrix_x = np.array([np.cos(angle_0), np.sin(angle_0)]).transpose((1, 0))
	rotate_matrix_x = np.repeat(rotate_matrix_x, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1)) # N52

	rotate_matrix_y = np.array([-np.sin(angle_0), np.cos(angle_0)]).transpose((1, 0))
	rotate_matrix_y = np.repeat(rotate_matrix_y, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1))

	p_rotate_x = np.sum(rotate_matrix_x * p, axis=2)[:, :, np.newaxis] # N51
	p_rotate_y = np.sum(rotate_matrix_y * p, axis=2)[:, :, np.newaxis] # N51

	p_rotate = np.concatenate([p_rotate_x, p_rotate_y], axis=2) # N52

	p3_in_origin = origin_0 - p_rotate[:, 4, :]
	new_p0 = p_rotate[:, 0, :] + p3_in_origin # N*2
	new_p1 = p_rotate[:, 1, :] + p3_in_origin
	new_p2 = p_rotate[:, 2, :] + p3_in_origin
	new_p3 = p_rotate[:, 3, :] + p3_in_origin

	new_p_0 = np.concatenate([new_p0[:, np.newaxis, :], new_p1[:, np.newaxis, :],
	new_p2[:, np.newaxis, :], new_p3[:, np.newaxis, :]], axis=1) # N42
	else:
	new_p_0 = np.zeros((0, 4, 2))
	# for angle < 0
	origin_1 = origin[angle < 0]
	d_1 = d[angle < 0]
	angle_1 = angle[angle < 0]
	if origin_1.shape[0] > 0:
	p = np.array([-d_1[:, 1] - d_1[:, 3], -d_1[:, 0] - d_1[:, 2],
	np.zeros(d_1.shape[0]), -d_1[:, 0] - d_1[:, 2],
	np.zeros(d_1.shape[0]), np.zeros(d_1.shape[0]),
	-d_1[:, 1] - d_1[:, 3], np.zeros(d_1.shape[0]),
	-d_1[:, 1], -d_1[:, 2]])
	p = p.transpose((1, 0)).reshape((-1, 5, 2)) # N52

	rotate_matrix_x = np.array([np.cos(-angle_1), -np.sin(-angle_1)]).transpose((1, 0))
	rotate_matrix_x = np.repeat(rotate_matrix_x, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1)) # N52

	rotate_matrix_y = np.array([np.sin(-angle_1), np.cos(-angle_1)]).transpose((1, 0))
	rotate_matrix_y = np.repeat(rotate_matrix_y, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1))

	p_rotate_x = np.sum(rotate_matrix_x * p, axis=2)[:, :, np.newaxis] # N51
	p_rotate_y = np.sum(rotate_matrix_y * p, axis=2)[:, :, np.newaxis] # N51

	p_rotate = np.concatenate([p_rotate_x, p_rotate_y], axis=2) # N52

	p3_in_origin = origin_1 - p_rotate[:, 4, :]
	new_p0 = p_rotate[:, 0, :] + p3_in_origin # N*2
	new_p1 = p_rotate[:, 1, :] + p3_in_origin
	new_p2 = p_rotate[:, 2, :] + p3_in_origin
	new_p3 = p_rotate[:, 3, :] + p3_in_origin

	new_p_1 = np.concatenate([new_p0[:, np.newaxis, :], new_p1[:, np.newaxis, :],
	new_p2[:, np.newaxis, :], new_p3[:, np.newaxis, :]], axis=1) # N42
	else:
	new_p_1 = np.zeros((0, 4, 2))
	return np.concatenate([new_p_0, new_p_1])


	def restore_rectangle(origin, geometry):
	return restore_rectangle_rbox(origin, geometry)


	def generate_rbox(im_size, polys, tags):
	h, w = im_size
	poly_mask = np.zeros((h, w), dtype=np.uint8)
	score_map = np.zeros((h, w), dtype=np.uint8)
	geo_map = np.zeros((h, w, 5), dtype=np.float32)
	# mask used during traning, to ignore some hard areas，用于忽略那些过小的文本
	training_mask = np.ones((h, w), dtype=np.uint8)
	for poly_idx, poly_tag in enumerate(zip(polys, tags)):
	poly = poly_tag[0]
	tag = poly_tag[1]

	# 对每个顶点，找到经过他的两条边中较短的那条
	r = [None, None, None, None]
	for i in range(4):
	r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]),
	np.linalg.norm(poly[i] - poly[(i - 1) % 4]))
	# score map
	# 放缩边框为之前的0.3倍，并对边框对应score图中的位置进行填充
	shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
	cv2.fillPoly(score_map, shrinked_poly, 1)
	cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)
	# if the poly is too small, then ignore it during training
	# 如果文本框标签太小或者txt中没具体标记是什么内容，即*或者###，则加掩模，训练时忽略该部分
	poly_h = min(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2]))
	poly_w = min(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3]))
	if min(poly_h, poly_w) < cfg.min_text_size:
	cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0)
	if tag:
	cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0)

	# 当前新加入的文本框区域像素点
	xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
	# if geometry == 'RBOX':
	# 对任意两个顶点的组合生成一个平行四边形 - generate a parallelogram for any combination of two vertices
	fitted_parallelograms = []
	for i in range(4):
	# 选中p0和p1的连线边，生成两个平行四边形
	p0 = poly[i]
	p1 = poly[(i + 1) % 4]
	p2 = poly[(i + 2) % 4]
	p3 = poly[(i + 3) % 4]
	# 拟合ax+by+c=0
	edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]])
	backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]])
	forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]])
	# 通过另外两个点距离edge的距离，来决定edge对应的平行线应该过p2还是p3
	if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3):
	# 平行线经过p2 - parallel lines through p2
	if edge[1] == 0:
	edge_opposite = [1, 0, -p2[0]]
	else:
	edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]]
	else:
	# 经过p3 - after p3
	if edge[1] == 0:
	edge_opposite = [1, 0, -p3[0]]
	else:
	edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]]
	# move forward edge
	new_p0 = p0
	new_p1 = p1
	new_p2 = p2
	new_p3 = p3
	new_p2 = line_cross_point(forward_edge, edge_opposite)
	if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line(p1, new_p2, p3):
	# across p0
	if forward_edge[1] == 0:
	forward_opposite = [1, 0, -p0[0]]
	else:
	forward_opposite = [forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0]]
	else:
	# across p3
	if forward_edge[1] == 0:
	forward_opposite = [1, 0, -p3[0]]
	else:
	forward_opposite = [forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0]]
	new_p0 = line_cross_point(forward_opposite, edge)
	new_p3 = line_cross_point(forward_opposite, edge_opposite)
	fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0])
	# or move backward edge
	new_p0 = p0
	new_p1 = p1
	new_p2 = p2
	new_p3 = p3
	new_p3 = line_cross_point(backward_edge, edge_opposite)
	if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2):
	# across p1
	if backward_edge[1] == 0:
	backward_opposite = [1, 0, -p1[0]]
	else:
	backward_opposite = [backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0]]
	else:
	# across p2
	if backward_edge[1] == 0:
	backward_opposite = [1, 0, -p2[0]]
	else:
	backward_opposite = [backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0]]
	new_p1 = line_cross_point(backward_opposite, edge)
	new_p2 = line_cross_point(backward_opposite, edge_opposite)
	fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0])

	# 选定面积最小的平行四边形
	areas = [Polygon(t).area for t in fitted_parallelograms]
	parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32)
	# sort thie polygon
	parallelogram_coord_sum = np.sum(parallelogram, axis=1)
	min_coord_idx = np.argmin(parallelogram_coord_sum)
	parallelogram = parallelogram[
	[min_coord_idx, (min_coord_idx + 1) % 4, (min_coord_idx + 2) % 4, (min_coord_idx + 3) % 4]]

	# 得到外包矩形即旋转角
	rectange = rectangle_from_parallelogram(parallelogram)
	rectange, rotate_angle = sort_rectangle(rectange)

	p0_rect, p1_rect, p2_rect, p3_rect = rectange
	# 对当前新加入的文本框区域像素点，根据其到矩形四边的距离修改geo_map
	for y, x in xy_in_poly:
	point = np.array([x, y], dtype=np.float32)
	# top
	geo_map[y, x, 0] = point_dist_to_line(p0_rect, p1_rect, point)
	# right
	geo_map[y, x, 1] = point_dist_to_line(p1_rect, p2_rect, point)
	# down
	geo_map[y, x, 2] = point_dist_to_line(p2_rect, p3_rect, point)
	# left
	geo_map[y, x, 3] = point_dist_to_line(p3_rect, p0_rect, point)
	# angle
	geo_map[y, x, 4] = rotate_angle
	return score_map, geo_map, training_mask


	def generator(index,
	input_size=512,
	background_ratio=3. / 8, # 纯背景样本比例
	random_scale=np.array([0.5, 1, 2.0, 3.0]), # 提取多尺度图片信息
	image_list=None):
	try:
	im_fn = image_list[index]
	im = cv2.imread(im_fn)
	if im is None:
	print("can't find image")
	return None, None, None, None, None
	h, w, _ = im.shape
	# 所以要把gt去掉
	txt_fn = im_fn.replace(os.path.basename(im_fn).split('.')[1], 'txt')
	if not os.path.exists(txt_fn):
	print('text file {} does not exists'.format(txt_fn))
	return None, None, None, None, None
	# 加载标注框信息
	text_polys, text_tags = load_annoataion(txt_fn)

	text_polys, text_tags = check_and_validate_polys(text_polys, text_tags, (h, w))

	# random scale this image,随机选择一种尺度
	rd_scale = np.random.choice(random_scale)
	im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
	text_polys *= rd_scale

	# random crop a area from image，3/8的选中的概率，裁剪纯背景的图片
	if np.random.rand() < background_ratio:
	# crop background
	im, text_polys, text_tags = crop_area(im, text_polys, text_tags, crop_background=True)
	if text_polys.shape[0] > 0:
	# print("cannot find background")
	return None, None, None, None, None
	# pad and resize image
	new_h, new_w, _ = im.shape
	max_h_w_i = np.max([new_h, new_w, input_size])
	im_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8)
	im_padded[:new_h, :new_w, :] = im.copy()
	# 将裁剪后图片扩充成512*512的图片
	im = cv2.resize(im_padded, dsize=(input_size, input_size))
	score_map = np.zeros((input_size, input_size), dtype=np.uint8)
	geo_map_channels = 5 if cfg.geometry == 'RBOX' else 8
	geo_map = np.zeros((input_size, input_size, geo_map_channels), dtype=np.float32)
	training_mask = np.ones((input_size, input_size), dtype=np.uint8)
	else:
	# 5 / 8的选中的概率，裁剪含文本信息的图片
	im, text_polys, text_tags = crop_area(im, text_polys, text_tags, crop_background=False)
	if text_polys.shape[0] == 0:
	# print("cannot find txt ground")
	return None, None, None, None, None
	h, w, _ = im.shape
	# pad the image to the training input size or the longer side of image
	new_h, new_w, _ = im.shape
	max_h_w_i = np.max([new_h, new_w, input_size])
	im_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8)
	im_padded[:new_h, :new_w, :] = im.copy()
	im = im_padded
	# resize the image to input size
	# 填充，resize图像至设定尺寸
	new_h, new_w, _ = im.shape
	resize_h = input_size
	resize_w = input_size
	im = cv2.resize(im, dsize=(resize_w, resize_h))
	# 将文本框坐标标签等比例修改
	resize_ratio_3_x = resize_w / float(new_w)
	resize_ratio_3_y = resize_h / float(new_h)
	text_polys[:, :, 0] *= resize_ratio_3_x
	text_polys[:, :, 1] *= resize_ratio_3_y
	new_h, new_w, _ = im.shape
	score_map, geo_map, training_mask = generate_rbox((new_h, new_w), text_polys, text_tags)

	# 将一个样本的样本内容和标签信息append
	images = im[:,:,::-1].astype(np.float32)
	# 文件名加入列表
	image_fns = im_fn
	# 512*512取提取四分之一行列
	score_maps = score_map[::4, ::4, np.newaxis].astype(np.float32)
	geo_maps = geo_map[::4, ::4, :].astype(np.float32)
	training_masks = training_mask[::4, ::4, np.newaxis].astype(np.float32)
	# 符合一个样本之后输出
	return images, image_fns, score_maps, geo_maps, training_masks

	except Exception as e:
	import traceback
	traceback.print_exc()

	# print("Exception is exist!")
	return None, None, None, None, None