Spaces:

Tzktz
/

Dit-document-layout-analysis

Running

App Files Files Community

Dit-document-layout-analysis / unilm /kosmos-2 /data /visualize_grit.py

Tzktz

Upload 7664 files

6fc683c verified over 1 year ago

raw

history blame contribute delete

6.07 kB

	import json
	import os
	import requests
	from urllib.parse import urlparse
	from requests.exceptions import HTTPError

	import sys
	from pathlib import Path
	import textwrap

	import ast
	import os
	import numpy as np

	from PIL import Image
	import matplotlib.pyplot as plt
	import matplotlib.pylab as pylab
	pylab.rcParams['figure.figsize'] = 20, 12

	import cv2
	import base64
	import io

	def download_images_from_jsonl(jsonl_path, output_folder):
	with open(jsonl_path, 'r') as jsonl_file:
	for line in jsonl_file:
	json_obj = json.loads(line)
	url = json_obj['url']
	# download_image(url, output_folder)
	vis_image(json_obj, output_folder)

	def download_image(url, output_folder):
	try:
	response = requests.get(url)
	response.raise_for_status()
	except HTTPError as e:
	print(f"Error while downloading {url}: {e}")
	return

	file_name = os.path.basename(urlparse(url).path)
	output_path = os.path.join(output_folder, file_name)

	with open(output_path, 'wb') as file:
	file.write(response.content)

	def imshow(img, file_name = "tmp.jpg", caption='test'):
	# Create figure and axis objects
	fig, ax = plt.subplots()
	# Show image on axis
	ax.imshow(img[:, :, [2, 1, 0]])
	ax.set_axis_off()
	# Set caption text
	# Add caption below image
	ax.text(0.5, -0.2, '\n'.join(textwrap.wrap(caption, 120)), ha='center', transform=ax.transAxes, fontsize=18)
	plt.savefig(file_name, bbox_inches='tight')
	plt.close()

	def vis_image(json_obj, output_folder):
	url = json_obj['url']
	try:
	response = requests.get(url)
	response.raise_for_status()

	file_name = os.path.basename(urlparse(url).path)
	# output_path = os.path.join(output_folder, file_name)
	file_key_name = json_obj['key'] + os.path.splitext(file_name)[1]
	output_path = os.path.join(output_folder, file_key_name)

	except Exception as e:
	print(f"Error while downloading {url}: {e}")
	return

	with open(output_path, 'wb') as file:
	file.write(response.content)

	try:
	pil_img = Image.open(output_path).convert("RGB")
	except:
	return
	image = np.array(pil_img)[:, :, [2, 1, 0]]
	image_h = pil_img.height
	image_w = pil_img.width
	caption = json_obj['caption']

	def is_overlapping(rect1, rect2):
	x1, y1, x2, y2 = rect1
	x3, y3, x4, y4 = rect2
	return not (x2 < x3 or x1 > x4 or y2 < y3 or y1 > y4)

	grounding_list = json_obj['ref_exps']
	new_image = image.copy()
	previous_locations = []
	previous_bboxes = []
	text_offset = 10
	text_offset_original = 4
	text_size = max(0.07 * min(image_h, image_w) / 100, 0.5)
	text_line = int(max(1 * min(image_h, image_w) / 512, 1))
	box_line = int(max(2 * min(image_h, image_w) / 512, 2))
	text_height = text_offset # init
	# pdb.set_trace()
	for (phrase_s, phrase_e, x1_norm, y1_norm, x2_norm, y2_norm, score) in grounding_list:
	phrase = caption[phrase_s:phrase_e]
	x1, y1, x2, y2 = int(x1_norm * image_w), int(y1_norm * image_h), int(x2_norm * image_w), int(y2_norm * image_h)
	print(f"Decode results: {phrase} - {[x1, y1, x2, y2]}")
	# draw bbox
	# random color
	color = tuple(np.random.randint(0, 255, size=3).tolist())
	new_image = cv2.rectangle(new_image, (x1, y1), (x2, y2), color, box_line)

	# add phrase name
	# decide the text location first
	for x_prev, y_prev in previous_locations:
	if abs(x1 - x_prev) < abs(text_offset) and abs(y1 - y_prev) < abs(text_offset):
	y1 += text_height

	if y1 < 2 * text_offset:
	y1 += text_offset + text_offset_original

	# add text background
	(text_width, text_height), _ = cv2.getTextSize(phrase, cv2.FONT_HERSHEY_SIMPLEX, text_size, text_line)
	text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2 = x1, y1 - text_height - text_offset_original, x1 + text_width, y1

	for prev_bbox in previous_bboxes:
	while is_overlapping((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), prev_bbox):
	text_bg_y1 += text_offset
	text_bg_y2 += text_offset
	y1 += text_offset

	if text_bg_y2 >= image_h:
	text_bg_y1 = max(0, image_h - text_height - text_offset_original)
	text_bg_y2 = image_h
	y1 = max(0, image_h - text_height - text_offset_original + text_offset)
	break

	alpha = 0.5
	for i in range(text_bg_y1, text_bg_y2):
	for j in range(text_bg_x1, text_bg_x2):
	if i < image_h and j < image_w:
	new_image[i, j] = (alpha * new_image[i, j] + (1 - alpha) * np.array(color)).astype(np.uint8)

	cv2.putText(
	new_image, phrase, (x1, y1 - text_offset_original), cv2.FONT_HERSHEY_SIMPLEX, text_size, (0, 0, 0), text_line, cv2.LINE_AA
	)
	previous_locations.append((x1, y1))
	previous_bboxes.append((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2))

	try:
	file_key_name = json_obj['key'] + '_exp' + os.path.splitext(file_name)[1]
	output_path = os.path.join(output_folder, file_key_name)

	imshow(new_image, file_name= output_path, caption=caption)
	except:
	# Out of (supported formats: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff, webp)
	return


	if __name__ == '__main__':
	# you need to download the jsonl before run this file
	jsonl_path = '/tmp/grit_coyo.jsonl'
	output_folder = './output/vis_grit'

	if not os.path.exists(output_folder):
	os.makedirs(output_folder)

	download_images_from_jsonl(jsonl_path, output_folder)