PaddleSeg-Matting / matting /transforms.py
marta-0's picture
add files
6da6215
raw
history blame
18.5 kB
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import cv2
import numpy as np
from paddleseg.transforms import functional
from paddleseg.cvlibs import manager
from PIL import Image
@manager.TRANSFORMS.add_component
class Compose:
"""
Do transformation on input data with corresponding pre-processing and augmentation operations.
The shape of input data to all operations is [height, width, channels].
"""
def __init__(self, transforms, to_rgb=True):
if not isinstance(transforms, list):
raise TypeError('The transforms must be a list!')
self.transforms = transforms
self.to_rgb = to_rgb
def __call__(self, data):
"""
Args:
data (dict): The data to transform.
Returns:
dict: Data after transformation
"""
if 'trans_info' not in data:
data['trans_info'] = []
for op in self.transforms:
data = op(data)
if data is None:
return None
data['img'] = np.transpose(data['img'], (2, 0, 1))
for key in data.get('gt_fields', []):
if len(data[key].shape) == 2:
continue
data[key] = np.transpose(data[key], (2, 0, 1))
return data
@manager.TRANSFORMS.add_component
class LoadImages:
def __init__(self, to_rgb=True):
self.to_rgb = to_rgb
def __call__(self, data):
if isinstance(data['img'], str):
data['img'] = cv2.imread(data['img'])
for key in data.get('gt_fields', []):
if isinstance(data[key], str):
data[key] = cv2.imread(data[key], cv2.IMREAD_UNCHANGED)
# if alpha and trimap has 3 channels, extract one.
if key in ['alpha', 'trimap']:
if len(data[key].shape) > 2:
data[key] = data[key][:, :, 0]
if self.to_rgb:
data['img'] = cv2.cvtColor(data['img'], cv2.COLOR_BGR2RGB)
for key in data.get('gt_fields', []):
if len(data[key].shape) == 2:
continue
data[key] = cv2.cvtColor(data[key], cv2.COLOR_BGR2RGB)
return data
@manager.TRANSFORMS.add_component
class Resize:
def __init__(self, target_size=(512, 512)):
if isinstance(target_size, list) or isinstance(target_size, tuple):
if len(target_size) != 2:
raise ValueError(
'`target_size` should include 2 elements, but it is {}'.
format(target_size))
else:
raise TypeError(
"Type of `target_size` is invalid. It should be list or tuple, but it is {}"
.format(type(target_size)))
self.target_size = target_size
def __call__(self, data):
data['trans_info'].append(('resize', data['img'].shape[0:2]))
data['img'] = functional.resize(data['img'], self.target_size)
for key in data.get('gt_fields', []):
data[key] = functional.resize(data[key], self.target_size)
return data
@manager.TRANSFORMS.add_component
class ResizeByLong:
"""
Resize the long side of an image to given size, and then scale the other side proportionally.
Args:
long_size (int): The target size of long side.
"""
def __init__(self, long_size):
self.long_size = long_size
def __call__(self, data):
data['trans_info'].append(('resize', data['img'].shape[0:2]))
data['img'] = functional.resize_long(data['img'], self.long_size)
for key in data.get('gt_fields', []):
data[key] = functional.resize_long(data[key], self.long_size)
return data
@manager.TRANSFORMS.add_component
class ResizeByShort:
"""
Resize the short side of an image to given size, and then scale the other side proportionally.
Args:
short_size (int): The target size of short side.
"""
def __init__(self, short_size):
self.short_size = short_size
def __call__(self, data):
data['trans_info'].append(('resize', data['img'].shape[0:2]))
data['img'] = functional.resize_short(data['img'], self.short_size)
for key in data.get('gt_fields', []):
data[key] = functional.resize_short(data[key], self.short_size)
return data
@manager.TRANSFORMS.add_component
class ResizeToIntMult:
"""
Resize to some int muitple, d.g. 32.
"""
def __init__(self, mult_int=32):
self.mult_int = mult_int
def __call__(self, data):
data['trans_info'].append(('resize', data['img'].shape[0:2]))
h, w = data['img'].shape[0:2]
rw = w - w % 32
rh = h - h % 32
data['img'] = functional.resize(data['img'], (rw, rh))
for key in data.get('gt_fields', []):
data[key] = functional.resize(data[key], (rw, rh))
return data
@manager.TRANSFORMS.add_component
class Normalize:
"""
Normalize an image.
Args:
mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5].
std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5].
Raises:
ValueError: When mean/std is not list or any value in std is 0.
"""
def __init__(self, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
self.mean = mean
self.std = std
if not (isinstance(self.mean, (list, tuple))
and isinstance(self.std, (list, tuple))):
raise ValueError(
"{}: input type is invalid. It should be list or tuple".format(
self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, data):
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
data['img'] = functional.normalize(data['img'], mean, std)
if 'fg' in data.get('gt_fields', []):
data['fg'] = functional.normalize(data['fg'], mean, std)
if 'bg' in data.get('gt_fields', []):
data['bg'] = functional.normalize(data['bg'], mean, std)
return data
@manager.TRANSFORMS.add_component
class RandomCropByAlpha:
"""
Randomly crop while centered on uncertain area by a certain probability.
Args:
crop_size (tuple|list): The size you want to crop from image.
p (float): The probability centered on uncertain area.
"""
def __init__(self, crop_size=((320, 320), (480, 480), (640, 640)),
prob=0.5):
self.crop_size = crop_size
self.prob = prob
def __call__(self, data):
idex = np.random.randint(low=0, high=len(self.crop_size))
crop_w, crop_h = self.crop_size[idex]
img_h = data['img'].shape[0]
img_w = data['img'].shape[1]
if np.random.rand() < self.prob:
crop_center = np.where((data['alpha'] > 0) & (data['alpha'] < 255))
center_h_array, center_w_array = crop_center
if len(center_h_array) == 0:
return data
rand_ind = np.random.randint(len(center_h_array))
center_h = center_h_array[rand_ind]
center_w = center_w_array[rand_ind]
delta_h = crop_h // 2
delta_w = crop_w // 2
start_h = max(0, center_h - delta_h)
start_w = max(0, center_w - delta_w)
else:
start_h = 0
start_w = 0
if img_h > crop_h:
start_h = np.random.randint(img_h - crop_h + 1)
if img_w > crop_w:
start_w = np.random.randint(img_w - crop_w + 1)
end_h = min(img_h, start_h + crop_h)
end_w = min(img_w, start_w + crop_w)
data['img'] = data['img'][start_h:end_h, start_w:end_w]
for key in data.get('gt_fields', []):
data[key] = data[key][start_h:end_h, start_w:end_w]
return data
@manager.TRANSFORMS.add_component
class RandomCrop:
"""
Randomly crop
Args:
crop_size (tuple|list): The size you want to crop from image.
"""
def __init__(self, crop_size=((320, 320), (480, 480), (640, 640))):
if not isinstance(crop_size[0], (list, tuple)):
crop_size = [crop_size]
self.crop_size = crop_size
def __call__(self, data):
idex = np.random.randint(low=0, high=len(self.crop_size))
crop_w, crop_h = self.crop_size[idex]
img_h, img_w = data['img'].shape[0:2]
start_h = 0
start_w = 0
if img_h > crop_h:
start_h = np.random.randint(img_h - crop_h + 1)
if img_w > crop_w:
start_w = np.random.randint(img_w - crop_w + 1)
end_h = min(img_h, start_h + crop_h)
end_w = min(img_w, start_w + crop_w)
data['img'] = data['img'][start_h:end_h, start_w:end_w]
for key in data.get('gt_fields', []):
data[key] = data[key][start_h:end_h, start_w:end_w]
return data
@manager.TRANSFORMS.add_component
class LimitLong:
"""
Limit the long edge of image.
If the long edge is larger than max_long, resize the long edge
to max_long, while scale the short edge proportionally.
If the long edge is smaller than min_long, resize the long edge
to min_long, while scale the short edge proportionally.
Args:
max_long (int, optional): If the long edge of image is larger than max_long,
it will be resize to max_long. Default: None.
min_long (int, optional): If the long edge of image is smaller than min_long,
it will be resize to min_long. Default: None.
"""
def __init__(self, max_long=None, min_long=None):
if max_long is not None:
if not isinstance(max_long, int):
raise TypeError(
"Type of `max_long` is invalid. It should be int, but it is {}"
.format(type(max_long)))
if min_long is not None:
if not isinstance(min_long, int):
raise TypeError(
"Type of `min_long` is invalid. It should be int, but it is {}"
.format(type(min_long)))
if (max_long is not None) and (min_long is not None):
if min_long > max_long:
raise ValueError(
'`max_long should not smaller than min_long, but they are {} and {}'
.format(max_long, min_long))
self.max_long = max_long
self.min_long = min_long
def __call__(self, data):
h, w = data['img'].shape[:2]
long_edge = max(h, w)
target = long_edge
if (self.max_long is not None) and (long_edge > self.max_long):
target = self.max_long
elif (self.min_long is not None) and (long_edge < self.min_long):
target = self.min_long
if target != long_edge:
data['trans_info'].append(('resize', data['img'].shape[0:2]))
data['img'] = functional.resize_long(data['img'], target)
for key in data.get('gt_fields', []):
data[key] = functional.resize_long(data[key], target)
return data
@manager.TRANSFORMS.add_component
class RandomHorizontalFlip:
"""
Flip an image horizontally with a certain probability.
Args:
prob (float, optional): A probability of horizontally flipping. Default: 0.5.
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, data):
if random.random() < self.prob:
data['img'] = functional.horizontal_flip(data['img'])
for key in data.get('gt_fields', []):
data[key] = functional.horizontal_flip(data[key])
return data
@manager.TRANSFORMS.add_component
class RandomBlur:
"""
Blurring an image by a Gaussian function with a certain probability.
Args:
prob (float, optional): A probability of blurring an image. Default: 0.1.
"""
def __init__(self, prob=0.1):
self.prob = prob
def __call__(self, data):
if self.prob <= 0:
n = 0
elif self.prob >= 1:
n = 1
else:
n = int(1.0 / self.prob)
if n > 0:
if np.random.randint(0, n) == 0:
radius = np.random.randint(3, 10)
if radius % 2 != 1:
radius = radius + 1
if radius > 9:
radius = 9
data['img'] = cv2.GaussianBlur(data['img'], (radius, radius), 0,
0)
for key in data.get('gt_fields', []):
data[key] = cv2.GaussianBlur(data[key], (radius, radius), 0,
0)
return data
@manager.TRANSFORMS.add_component
class RandomDistort:
"""
Distort an image with random configurations.
Args:
brightness_range (float, optional): A range of brightness. Default: 0.5.
brightness_prob (float, optional): A probability of adjusting brightness. Default: 0.5.
contrast_range (float, optional): A range of contrast. Default: 0.5.
contrast_prob (float, optional): A probability of adjusting contrast. Default: 0.5.
saturation_range (float, optional): A range of saturation. Default: 0.5.
saturation_prob (float, optional): A probability of adjusting saturation. Default: 0.5.
hue_range (int, optional): A range of hue. Default: 18.
hue_prob (float, optional): A probability of adjusting hue. Default: 0.5.
"""
def __init__(self,
brightness_range=0.5,
brightness_prob=0.5,
contrast_range=0.5,
contrast_prob=0.5,
saturation_range=0.5,
saturation_prob=0.5,
hue_range=18,
hue_prob=0.5):
self.brightness_range = brightness_range
self.brightness_prob = brightness_prob
self.contrast_range = contrast_range
self.contrast_prob = contrast_prob
self.saturation_range = saturation_range
self.saturation_prob = saturation_prob
self.hue_range = hue_range
self.hue_prob = hue_prob
def __call__(self, data):
brightness_lower = 1 - self.brightness_range
brightness_upper = 1 + self.brightness_range
contrast_lower = 1 - self.contrast_range
contrast_upper = 1 + self.contrast_range
saturation_lower = 1 - self.saturation_range
saturation_upper = 1 + self.saturation_range
hue_lower = -self.hue_range
hue_upper = self.hue_range
ops = [
functional.brightness, functional.contrast, functional.saturation,
functional.hue
]
random.shuffle(ops)
params_dict = {
'brightness': {
'brightness_lower': brightness_lower,
'brightness_upper': brightness_upper
},
'contrast': {
'contrast_lower': contrast_lower,
'contrast_upper': contrast_upper
},
'saturation': {
'saturation_lower': saturation_lower,
'saturation_upper': saturation_upper
},
'hue': {
'hue_lower': hue_lower,
'hue_upper': hue_upper
}
}
prob_dict = {
'brightness': self.brightness_prob,
'contrast': self.contrast_prob,
'saturation': self.saturation_prob,
'hue': self.hue_prob
}
im = data['img'].astype('uint8')
im = Image.fromarray(im)
for id in range(len(ops)):
params = params_dict[ops[id].__name__]
params['im'] = im
prob = prob_dict[ops[id].__name__]
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
data['img'] = np.asarray(im)
for key in data.get('gt_fields', []):
if key in ['alpha', 'trimap']:
continue
else:
im = data[key].astype('uint8')
im = Image.fromarray(im)
for id in range(len(ops)):
params = params_dict[ops[id].__name__]
params['im'] = im
prob = prob_dict[ops[id].__name__]
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
data[key] = np.asarray(im)
return data
if __name__ == "__main__":
transforms = [RandomDistort()]
transforms = Compose(transforms)
fg_path = '/ssd1/home/chenguowei01/github/PaddleSeg/contrib/matting/data/matting/human_matting/Distinctions-646/train/fg/13(2).png'
alpha_path = fg_path.replace('fg', 'alpha')
bg_path = '/ssd1/home/chenguowei01/github/PaddleSeg/contrib/matting/data/matting/human_matting/bg/unsplash_bg/attic/photo-1443884590026-2e4d21aee71c?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMjA3fDB8MXxzZWFyY2h8Nzh8fGF0dGljfGVufDB8fHx8MTYyOTY4MDcxNQ&ixlib=rb-1.2.1&q=80&w=400.jpg'
data = {}
data['fg'] = cv2.imread(fg_path)
data['bg'] = cv2.imread(bg_path)
h, w, c = data['fg'].shape
data['bg'] = cv2.resize(data['bg'], (w, h))
alpha = cv2.imread(alpha_path)
data['alpha'] = alpha[:, :, 0]
alpha = alpha / 255.
data['img'] = alpha * data['fg'] + (1 - alpha) * data['bg']
data['gt_fields'] = ['fg', 'bg']
print(data['img'].shape)
for key in data['gt_fields']:
print(data[key].shape)
# import pdb
# pdb.set_trace()
data = transforms(data)
print(data['img'].dtype, data['img'].shape)
cv2.imwrite('distort_img.jpg', data['img'].transpose([1, 2, 0]))