yolov5anime / detect.py
jezzadebate's picture
Initial commit
8688974
import argparse
import time
from pathlib import Path
from PIL import Image
import torch
from models.experimental import attempt_load
from utils.datasets import LoadImages
from utils.general import (
check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer)
from utils.torch_utils import select_device, load_classifier, time_synchronized
def detect():
out, source, weights, view_img, save_txt, imgsz = \
opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
# Initialize
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
# Set Dataloader
dataset = LoadImages(source, img_size=imgsz)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
# Run inference
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
if img is None:
print("Image not found:", path)
continue
try:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
else:
p, s, im0 = path, '', im0s
save_path = str(Path(out) / Path(p).name)
s += '%gx%g ' % img.shape[2:] # print string
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
# Write results
for *xyxy, _, _ in det:
box = (int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]))
process(p, save_path, box)
break
else:
process(p, save_path)
except KeyboardInterrupt:
raise
except:
print("Error processing file", path)
print('Done. (%.3fs)' % (time.time() - t0))
def process(in_file, out_file, box=None):
img = Image.open(in_file)
if box is None:
box = [0,0,img.size[0],img.size[0]]
img_pad = 25
box_l = int(box[0]) - img_pad
box_t = int(box[1]) - img_pad
box_r = int(box[2]) + img_pad
box_b = int(box[3]) + img_pad
# normalize box coordinates
box_l = max(0, box_l)
box_t = max(0, box_t)
box_r = min(img.size[0], box_r)
box_b = min(img.size[1], box_b)
# calculate box width and height
box_w = int(box_r-box_l)
box_h = int(box_b-box_t)
print("image size", img.size)
print("original box", (box_l, box_t, box_r, box_b))
print("original box size", box_w, "x", box_h)
# find the smaller dimension
box_d = min(box_w, box_h)
# adjust box coordinates to be square
box_l = int(box_l + (box_w - box_d)/2)
box_t = int(box_t + (box_h - box_d)/2)
box_r = int(box_l + box_d)
box_b = int(box_t + box_d)
box_w = int(box_r-box_l)
box_h = int(box_b-box_t)
print("adjusted box", (box_l, box_t, box_r, box_b))
print("adjusted size", box_w, "x", box_h)
im_new = img.crop((box_l, box_t, box_r, box_b)).resize((300,300), Image.Resampling.LANCZOS)
im_new.save(out_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='weights/yolov5x_anime.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, help='source', required=True) # file/folder, 0 for webcam
parser.add_argument('--output', type=str, help='output folder', required=True) # output folder
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
opt = parser.parse_args()
print(opt)
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()