yolov5anime / detect.py

Initial commit

8688974 almost 2 years ago

6.29 kB

	import argparse

	import time
	from pathlib import Path
	from PIL import Image

	import torch

	from models.experimental import attempt_load
	from utils.datasets import LoadImages
	from utils.general import (
	check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
	from utils.torch_utils import select_device, load_classifier, time_synchronized


	def detect():
	out, source, weights, view_img, save_txt, imgsz = \
	opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
	webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

	# Initialize
	device = select_device(opt.device)
	half = device.type != 'cpu' # half precision only supported on CUDA

	# Load model
	model = attempt_load(weights, map_location=device) # load FP32 model
	imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
	if half:
	model.half() # to FP16

	# Set Dataloader
	dataset = LoadImages(source, img_size=imgsz)

	# Get names and colors
	names = model.module.names if hasattr(model, 'module') else model.names

	# Run inference
	t0 = time.time()
	img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
	_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
	for path, img, im0s, vid_cap in dataset:
	if img is None:
	print("Image not found:", path)
	continue
	try:
	img = torch.from_numpy(img).to(device)
	img = img.half() if half else img.float() # uint8 to fp16/32
	img /= 255.0 # 0 - 255 to 0.0 - 1.0
	if img.ndimension() == 3:
	img = img.unsqueeze(0)

	# Inference
	pred = model(img, augment=opt.augment)[0]

	# Apply NMS
	pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)

	# Process detections
	for i, det in enumerate(pred): # detections per image
	if webcam: # batch_size >= 1
	p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
	else:
	p, s, im0 = path, '', im0s

	save_path = str(Path(out) / Path(p).name)
	s += '%gx%g ' % img.shape[2:] # print string

	if det is not None and len(det):
	# Rescale boxes from img_size to im0 size
	det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

	# Print results
	for c in det[:, -1].unique():
	n = (det[:, -1] == c).sum() # detections per class
	s += '%g %ss, ' % (n, names[int(c)]) # add to string

	# Write results
	for *xyxy, _, _ in det:
	box = (int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]))
	process(p, save_path, box)
	break
	else:
	process(p, save_path)
	except KeyboardInterrupt:
	raise
	except:
	print("Error processing file", path)

	print('Done. (%.3fs)' % (time.time() - t0))

	def process(in_file, out_file, box=None):
	img = Image.open(in_file)

	if box is None:
	box = [0,0,img.size[0],img.size[0]]

	img_pad = 25

	box_l = int(box[0]) - img_pad
	box_t = int(box[1]) - img_pad
	box_r = int(box[2]) + img_pad
	box_b = int(box[3]) + img_pad

	# normalize box coordinates
	box_l = max(0, box_l)
	box_t = max(0, box_t)
	box_r = min(img.size[0], box_r)
	box_b = min(img.size[1], box_b)

	# calculate box width and height
	box_w = int(box_r-box_l)
	box_h = int(box_b-box_t)

	print("image size", img.size)
	print("original box", (box_l, box_t, box_r, box_b))
	print("original box size", box_w, "x", box_h)

	# find the smaller dimension
	box_d = min(box_w, box_h)

	# adjust box coordinates to be square
	box_l = int(box_l + (box_w - box_d)/2)
	box_t = int(box_t + (box_h - box_d)/2)
	box_r = int(box_l + box_d)
	box_b = int(box_t + box_d)

	box_w = int(box_r-box_l)
	box_h = int(box_b-box_t)

	print("adjusted box", (box_l, box_t, box_r, box_b))
	print("adjusted size", box_w, "x", box_h)

	im_new = img.crop((box_l, box_t, box_r, box_b)).resize((300,300), Image.Resampling.LANCZOS)
	im_new.save(out_file)

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5x_anime.pt', help='model.pt path(s)')
	parser.add_argument('--source', type=str, help='source', required=True) # file/folder, 0 for webcam
	parser.add_argument('--output', type=str, help='output folder', required=True) # output folder
	parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
	parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
	parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
	parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
	parser.add_argument('--view-img', action='store_true', help='display results')
	parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
	parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
	parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
	parser.add_argument('--augment', action='store_true', help='augmented inference')
	parser.add_argument('--update', action='store_true', help='update all models')
	opt = parser.parse_args()
	print(opt)

	with torch.no_grad():
	if opt.update: # update all models (to fix SourceChangeWarning)
	for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
	detect()
	strip_optimizer(opt.weights)
	else:
	detect()