Spaces:

haakohu
/

deep_privacy2

Runtime error

App Files Files Community

haakohu commited on Mar 23, 2023

Commit

31c6733

1 Parent(s): e24da0e

update demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.gitmodules +3 -0
app.py +21 -62
configs/anonymizers/FB_cse.py +0 -28
configs/anonymizers/FB_cse_mask.py +0 -29
configs/anonymizers/FB_cse_mask_face.py +0 -29
configs/anonymizers/face.py +0 -18
configs/anonymizers/market1501/blackout.py +0 -8
configs/anonymizers/market1501/person.py +0 -6
configs/anonymizers/market1501/pixelation16.py +0 -8
configs/anonymizers/market1501/pixelation8.py +0 -8
configs/datasets/coco_cse.py +0 -69
configs/datasets/fdf128.py +0 -24
configs/datasets/fdf256.py +0 -69
configs/datasets/fdh.py +0 -89
configs/datasets/utils.py +0 -12
configs/defaults.py +0 -45
configs/discriminators/sg2_discriminator.py +0 -42
configs/fdf/stylegan.py +0 -14
configs/fdf/stylegan_fdf128.py +0 -13
configs/fdh/styleganL.py +0 -16
configs/fdh/styleganL_nocse.py +0 -14
configs/generators/stylegan_unet.py +0 -22
deep_privacy2 +1 -0
dp2/__init__.py +0 -0
dp2/anonymizer/__init__.py +0 -1
dp2/anonymizer/anonymizer.py +0 -159
dp2/data/__init__.py +0 -0
dp2/data/build.py +0 -148
dp2/data/datasets/__init__.py +0 -0
dp2/data/datasets/coco_cse.py +0 -148
dp2/data/datasets/fdf.py +0 -129
dp2/data/datasets/fdh.py +0 -104
dp2/data/transforms/__init__.py +0 -2
dp2/data/transforms/functional.py +0 -61
dp2/data/transforms/stylegan2_transform.py +0 -394
dp2/data/transforms/transforms.py +0 -247
dp2/data/utils.py +0 -102
dp2/detection/__init__.py +0 -3
dp2/detection/base.py +0 -45
dp2/detection/box_utils.py +0 -104
dp2/detection/box_utils_fdf.py +0 -203
dp2/detection/cse_mask_face_detector.py +0 -116
dp2/detection/face_detector.py +0 -62
dp2/detection/models/__init__.py +0 -0
dp2/detection/models/cse.py +0 -135
dp2/detection/models/keypoint_maskrcnn.py +0 -111
dp2/detection/models/mask_rcnn.py +0 -78
dp2/detection/person_detector.py +0 -135
dp2/detection/structures.py +0 -464

.gitattributes CHANGED Viewed

@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 erling.jpg filter=lfs diff=lfs merge=lfs -text
 *.jpg filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 erling.jpg filter=lfs diff=lfs merge=lfs -text
 *.jpg filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/WIDERFace_DSFD_RES152.pth filter=lfs diff=lfs merge=lfs -text
+torch_home/hub/checkpoints/89660f04-5c11-4dbf-adac-cbe2f11b0aeea25cbf78-7558-475a-b3c7-03f5c10b7934646b0720-ca0a-4d53-aded-daddbfa45c9e filter=lfs diff=lfs merge=lfs -text

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "deep_privacy2"]
+	path = deep_privacy2
+	url = https://github.com/hukkelas/deep_privacy2

app.py CHANGED Viewed

@@ -1,78 +1,37 @@
 import os
 os.system("pip install --upgrade pip")
 os.system("pip install ftfy regex tqdm")
-os.system("pip install git+https://github.com/openai/CLIP.git")
 os.system("pip install git+https://github.com/facebookresearch/detectron2@96c752ce821a3340e27edd51c28a00665dd32a30#subdirectory=projects/DensePose")
-os.system("pip install git+https://github.com/hukkelas/DSFD-Pytorch-Inference")
-import gradio
-import numpy as np
-import torch
-from PIL import Image
 from dp2 import utils
-from tops.config import instantiate
-import tops
-import gradio.inputs
-cfg_body = utils.load_config("configs/anonymizers/FB_cse.py")
-anonymizer_body = instantiate(cfg_body.anonymizer, load_cache=False)
-anonymizer_body.initialize_tracker(fps=1)
-cfg_face = utils.load_config("configs/anonymizers/face.py")
-anonymizer_face = instantiate(cfg_face.anonymizer, load_cache=False)
-anonymizer_face.initialize_tracker(fps=1)
-class ExampleDemo:
-    def __init__(self, anonymizer, multi_modal_truncation=False) -> None:
-        self.multi_modal_truncation = multi_modal_truncation
-        self.anonymizer = anonymizer
-        with gradio.Row():
-            input_image = gradio.Image(type="pil", label="Upload your image or try the example below!")
-            output_image = gradio.Image(type="numpy", label="Output")
-        with gradio.Row():
-            update_btn = gradio.Button("Update Anonymization").style(full_width=True)
-        visualize_det = gradio.Checkbox(value=False, label="Show Detections")
-        visualize_det.change(self.anonymize, inputs=[input_image, visualize_det], outputs=[output_image])
-        gradio.Examples(
-            ["erling.jpg", "g7-summit-leaders-distraction.jpg"], inputs=[input_image]
-        )
-        update_btn.click(self.anonymize, inputs=[input_image, visualize_det], outputs=[output_image])
-        input_image.change(self.anonymize, inputs=[input_image, visualize_det], outputs=[output_image])
-        self.track = False
-    def anonymize(self, img: Image, visualize_detection: bool):
-        img, cache_id = pil2torch(img)
-        img = tops.to_cuda(img)
-        if visualize_detection:
-            img = self.anonymizer.visualize_detection(img, cache_id=cache_id)
-        else:
-            img = self.anonymizer(
-                img, truncation_value=0 if self.multi_modal_truncation else 1, multi_modal_truncation=self.multi_modal_truncation, amp=True,
-                cache_id=cache_id, track=self.track)
-        img = utils.im2numpy(img)
-        return img
-def pil2torch(img: Image.Image):
-    img = img.convert("RGB")
-    img = np.array(img)
-    img = np.rollaxis(img, 2)
-    return torch.from_numpy(img), None
 with gradio.Blocks() as demo:
     gradio.Markdown("# <center> DeepPrivacy2 - Realistic Image Anonymization </center>")
     gradio.Markdown("### <center> Håkon Hukkelås, Rudolf Mester, Frank Lindseth </center>")
-    gradio.Markdown("<center> DeepPrivacy2 is a toolbox for realistic anonymization of humans, including a face and a full-body anonymizer. </center>")
     gradio.Markdown("<center> See more information at: <a href='https://github.com/hukkelas/deep_privacy2'> https://github.com/hukkelas/deep_privacy2 </a> </center>")
-    with gradio.Tab("Full-Body Anonymization"):
-        ExampleDemo(anonymizer_body, multi_modal_truncation=True)
-    with gradio.Tab("Face Anonymization"):
-        ExampleDemo(anonymizer_face, multi_modal_truncation=False)
-demo.launch()

+import gradio
+import sys
 import os
+from pathlib import Path
+from tops.config import instantiate
+import gradio.inputs
 os.system("pip install --upgrade pip")
 os.system("pip install ftfy regex tqdm")
+os.system("pip install --no-deps git+https://github.com/openai/CLIP.git")
 os.system("pip install git+https://github.com/facebookresearch/detectron2@96c752ce821a3340e27edd51c28a00665dd32a30#subdirectory=projects/DensePose")
+os.system("pip install --no-deps git+https://github.com/hukkelas/DSFD-Pytorch-Inference")
+sys.path.insert(0, Path(os.getcwd(), "deep_privacy2"))
+os.environ["TORCH_HOME"] = "torch_home"
 from dp2 import utils
+from gradio_demos.modules import ExampleDemo, WebcamDemo
+cfg_face = utils.load_config("deep_privacy2/configs/anonymizers/face.py")
+for key in ["person_G_cfg", "cse_person_G_cfg", "face_G_cfg", "car_G_cfg"]:
+    if key in cfg_face.anonymizer:
+        cfg_face.anonymizer[key] = Path("deep_privacy2", cfg_face.anonymizer[key])
+anonymizer_face = instantiate(cfg_face.anonymizer, load_cache=False)
+anonymizer_face.initialize_tracker(fps=1)
 with gradio.Blocks() as demo:
     gradio.Markdown("# <center> DeepPrivacy2 - Realistic Image Anonymization </center>")
     gradio.Markdown("### <center> Håkon Hukkelås, Rudolf Mester, Frank Lindseth </center>")
     gradio.Markdown("<center> See more information at: <a href='https://github.com/hukkelas/deep_privacy2'> https://github.com/hukkelas/deep_privacy2 </a> </center>")
+    with gradio.Tab("Face Anonymization"):
+        ExampleDemo(anonymizer_face)
+    with gradio.Tab("Live Webcam"):
+        WebcamDemo(anonymizer_face)
+demo.launch()

configs/anonymizers/FB_cse.py DELETED Viewed

@@ -1,28 +0,0 @@
-from dp2.anonymizer import Anonymizer
-from dp2.detection.person_detector import CSEPersonDetector
-from ..defaults import common
-from tops.config import LazyCall as L
-from dp2.generator.dummy_generators import MaskOutGenerator
-maskout_G = L(MaskOutGenerator)(noise="constant")
-detector = L(CSEPersonDetector)(
-    mask_rcnn_cfg=dict(),
-    cse_cfg=dict(),
-    cse_post_process_cfg=dict(
-        target_imsize=(288, 160),
-        exp_bbox_cfg=dict(percentage_background=0.3, axis_minimum_expansion=.1),
-        exp_bbox_filter=dict(minimum_area=32*32, min_bbox_ratio_inside=0, aspect_ratio_range=[0, 99999]),
-        iou_combine_threshold=0.4,
-        dilation_percentage=0.02,
-        normalize_embedding=False
-    ),
-    score_threshold=0.3,
-    cache_directory=common.output_dir.joinpath("cse_person_detection_cache")
-)
-anonymizer = L(Anonymizer)(
-    detector="${detector}",
-    cse_person_G_cfg="configs/fdh/styleganL.py",
-)

configs/anonymizers/FB_cse_mask.py DELETED Viewed

@@ -1,29 +0,0 @@
-from dp2.anonymizer import Anonymizer
-from dp2.detection.person_detector import CSEPersonDetector
-from ..defaults import common
-from tops.config import LazyCall as L
-from dp2.generator.dummy_generators import MaskOutGenerator
-maskout_G = L(MaskOutGenerator)(noise="constant")
-detector = L(CSEPersonDetector)(
-    mask_rcnn_cfg=dict(),
-    cse_cfg=dict(),
-    cse_post_process_cfg=dict(
-        target_imsize=(288, 160),
-        exp_bbox_cfg=dict(percentage_background=0.3, axis_minimum_expansion=.1),
-        exp_bbox_filter=dict(minimum_area=32*32, min_bbox_ratio_inside=0, aspect_ratio_range=[0, 99999]),
-        iou_combine_threshold=0.4,
-        dilation_percentage=0.02,
-        normalize_embedding=False
-    ),
-    score_threshold=0.3,
-    cache_directory=common.output_dir.joinpath("cse_person_detection_cache")
-)
-anonymizer = L(Anonymizer)(
-    detector="${detector}",
-    person_G_cfg="configs/fdh/styleganL_nocse.py",
-    cse_person_G_cfg="configs/fdh/styleganL.py",
-)

configs/anonymizers/FB_cse_mask_face.py DELETED Viewed

@@ -1,29 +0,0 @@
-from dp2.anonymizer import Anonymizer
-from dp2.detection.cse_mask_face_detector import CSeMaskFaceDetector
-from ..defaults import common
-from tops.config import LazyCall as L
-detector = L(CSeMaskFaceDetector)(
-    mask_rcnn_cfg=dict(),
-    face_detector_cfg=dict(),
-    face_post_process_cfg=dict(target_imsize=(256, 256)),
-    cse_cfg=dict(),
-    cse_post_process_cfg=dict(
-        target_imsize=(288, 160),
-        exp_bbox_cfg=dict(percentage_background=0.3, axis_minimum_expansion=.1),
-        exp_bbox_filter=dict(minimum_area=32*32, min_bbox_ratio_inside=0, aspect_ratio_range=[0, 99999]),
-        iou_combine_threshold=0.4,
-        dilation_percentage=0.02,
-        normalize_embedding=False
-    ),
-    score_threshold=0.3,
-    cache_directory=common.output_dir.joinpath("cse_mask_face_detection_cache")
-)
-anonymizer = L(Anonymizer)(
-    detector="${detector}",
-    face_G_cfg="configs/fdf/stylegan.py",
-    person_G_cfg="configs/fdh/styleganL_nocse.py",
-    cse_person_G_cfg="configs/fdh/styleganL.py",
-    car_G_cfg="configs/generators/dummy/pixelation8.py"
-)

configs/anonymizers/face.py DELETED Viewed

@@ -1,18 +0,0 @@
-from dp2.anonymizer import Anonymizer
-from dp2.detection.face_detector import FaceDetector
-from ..defaults import common
-from tops.config import LazyCall as L
-detector = L(FaceDetector)(
-    face_detector_cfg=dict(name="DSFDDetector", clip_boxes=True),
-    face_post_process_cfg=dict(target_imsize=(256, 256), fdf128_expand=False),
-    score_threshold=0.3,
-    cache_directory=common.output_dir.joinpath("face_detection_cache")
-)
-anonymizer = L(Anonymizer)(
-    detector="${detector}",
-    face_G_cfg="configs/fdf/stylegan.py",
-)

configs/anonymizers/market1501/blackout.py DELETED Viewed

@@ -1,8 +0,0 @@
-from ..FB_cse_mask_face import anonymizer, detector, common
-detector.score_threshold = .1
-detector.face_detector_cfg.confidence_threshold = .5
-detector.cse_cfg.score_thres = 0.3
-anonymizer.generators.face_G_cfg = None
-anonymizer.generators.person_G_cfg = "configs/generators/dummy/maskout.py"
-anonymizer.generators.cse_person_G_cfg = "configs/generators/dummy/maskout.py"

configs/anonymizers/market1501/person.py DELETED Viewed

@@ -1,6 +0,0 @@
-from ..FB_cse_mask_face import anonymizer, detector, common
-detector.score_threshold = .1
-detector.face_detector_cfg.confidence_threshold = .5
-detector.cse_cfg.score_thres = 0.3
-anonymizer.generators.face_G_cfg = None

configs/anonymizers/market1501/pixelation16.py DELETED Viewed

@@ -1,8 +0,0 @@
-from ..FB_cse_mask_face import anonymizer, detector, common
-detector.score_threshold = .1
-detector.face_detector_cfg.confidence_threshold = .5
-detector.cse_cfg.score_thres = 0.3
-anonymizer.generators.face_G_cfg = None
-anonymizer.generators.person_G_cfg = "configs/generators/dummy/pixelation16.py"
-anonymizer.generators.cse_person_G_cfg = "configs/generators/dummy/pixelation16.py"

configs/anonymizers/market1501/pixelation8.py DELETED Viewed

@@ -1,8 +0,0 @@
-from ..FB_cse_mask_face import anonymizer, detector, common
-detector.score_threshold = .1
-detector.face_detector_cfg.confidence_threshold = .5
-detector.cse_cfg.score_thres = 0.3
-anonymizer.generators.face_G_cfg = None
-anonymizer.generators.person_G_cfg = "configs/generators/dummy/pixelation8.py"
-anonymizer.generators.cse_person_G_cfg = "configs/generators/dummy/pixelation8.py"

configs/datasets/coco_cse.py DELETED Viewed

@@ -1,69 +0,0 @@
-import os
-from pathlib import Path
-from tops.config import LazyCall as L
-import torch
-import functools
-from dp2.data.datasets import CocoCSE
-from dp2.data.build import get_dataloader
-from dp2.data.transforms.transforms import CreateEmbedding, Normalize, Resize, ToFloat, CreateCondition, RandomHorizontalFlip
-from dp2.data.transforms.stylegan2_transform import StyleGANAugmentPipe
-from dp2.metrics.torch_metrics import compute_metrics_iteratively
-from .utils import final_eval_fn
-dataset_base_dir = os.environ["BASE_DATASET_DIR"] if "BASE_DATASET_DIR" in os.environ else "data"
-metrics_cache = os.environ["FBA_METRICS_CACHE"] if "FBA_METRICS_CACHE" in os.environ else ".cache"
-data_dir = Path(dataset_base_dir, "coco_cse")
-data = dict(
-    imsize=(288, 160),
-    im_channels=3,
-    semantic_nc=26,
-    cse_nc=16,
-    train=dict(
-        dataset=L(CocoCSE)(data_dir.joinpath("train"), transform=None, normalize_E=False),
-        loader=L(get_dataloader)(
-            shuffle=True, num_workers=6, drop_last=True, prefetch_factor=2,
-            batch_size="${train.batch_size}",
-            dataset="${..dataset}",
-            infinite=True,
-            gpu_transform=L(torch.nn.Sequential)(*[
-                L(ToFloat)(),
-                L(StyleGANAugmentPipe)(
-                    rotate=0.5, rotate_max=.05,
-                    xint=.5, xint_max=0.05,
-                    scale=.5, scale_std=.05,
-                    aniso=0.5, aniso_std=.05,
-                    xfrac=.5, xfrac_std=.05,
-                    brightness=.5, brightness_std=.05,
-                    contrast=.5, contrast_std=.1,
-                    hue=.5, hue_max=.05,
-                    saturation=.5, saturation_std=.5,
-                    imgfilter=.5, imgfilter_std=.1),
-                L(RandomHorizontalFlip)(p=0.5),
-                L(CreateEmbedding)(),
-                L(Resize)(size="${data.imsize}"),
-                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
-                L(CreateCondition)(),
-            ])
-        )
-    ),
-    val=dict(
-        dataset=L(CocoCSE)(data_dir.joinpath("val"), transform=None, normalize_E=False),
-        loader=L(get_dataloader)(
-            shuffle=False, num_workers=6, drop_last=True, prefetch_factor=2,
-            batch_size="${train.batch_size}",
-            dataset="${..dataset}",
-            infinite=False,
-            gpu_transform=L(torch.nn.Sequential)(*[
-                L(ToFloat)(),
-                L(CreateEmbedding)(),
-                L(Resize)(size="${data.imsize}"),
-                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
-                L(CreateCondition)(),
-            ])
-        )
-    ),
-    # Training evaluation might do optimizations to reduce compute overhead. E.g. compute with AMP.
-    train_evaluation_fn=functools.partial(compute_metrics_iteratively, cache_directory=Path(metrics_cache, "coco_cse_val"), include_two_fake=False),
-    evaluation_fn=functools.partial(final_eval_fn, cache_directory=Path(metrics_cache, "coco_cse_val_final"), include_two_fake=True)
-)

configs/datasets/fdf128.py DELETED Viewed

@@ -1,24 +0,0 @@
-from pathlib import Path
-from functools import partial
-from dp2.data.datasets.fdf import FDFDataset
-from .fdf256 import data, dataset_base_dir, metrics_cache, final_eval_fn
-data_dir = Path(dataset_base_dir, "fdf")
-data.train.dataset.dirpath = data_dir.joinpath("train")
-data.val.dataset.dirpath = data_dir.joinpath("val")
-data.imsize = (128, 128)
-data.train_evaluation_fn = partial(
-    final_eval_fn, cache_directory=Path(metrics_cache, "fdf128_val_train"))
-data.evaluation_fn = partial(
-    final_eval_fn, cache_directory=Path(metrics_cache, "fdf128_val_final"))
-data.train.dataset.update(
-    _target_ = FDFDataset,
-    imsize="${data.imsize}"
-)
-data.val.dataset.update(
-    _target_ = FDFDataset,
-    imsize="${data.imsize}"
-)

configs/datasets/fdf256.py DELETED Viewed

@@ -1,69 +0,0 @@
-import os
-from pathlib import Path
-from tops.config import LazyCall as L
-import torch
-import functools
-from dp2.data.datasets.fdf import FDF256Dataset
-from dp2.data.build import get_dataloader
-from dp2.data.transforms.transforms import Normalize, Resize, ToFloat, CreateCondition, RandomHorizontalFlip
-from dp2.metrics.torch_metrics import compute_metrics_iteratively
-from dp2.metrics.fid_clip import compute_fid_clip
-from dp2.metrics.ppl import calculate_ppl
-from .utils import final_eval_fn
-def final_eval_fn(*args, **kwargs):
-    result = compute_metrics_iteratively(*args, **kwargs)
-    result2 = compute_fid_clip(*args, **kwargs)
-    assert all(key not in result for key in result2)
-    result.update(result2)
-    result3 = calculate_ppl(*args, **kwargs,)
-    assert all(key not in result for key in result3)
-    result.update(result3)
-    return result
-dataset_base_dir = os.environ["BASE_DATASET_DIR"] if "BASE_DATASET_DIR" in os.environ else "data"
-metrics_cache = os.environ["FBA_METRICS_CACHE"] if "FBA_METRICS_CACHE" in os.environ else ".cache"
-data_dir = Path(dataset_base_dir, "fdf256")
-data = dict(
-    imsize=(256, 256),
-    im_channels=3,
-    semantic_nc=None,
-    cse_nc=None,
-    n_keypoints=None,
-    train=dict(
-        dataset=L(FDF256Dataset)(dirpath=data_dir.joinpath("train"), transform=None, load_keypoints=False),
-        loader=L(get_dataloader)(
-            shuffle=True, num_workers=3, drop_last=True, prefetch_factor=2,
-            batch_size="${train.batch_size}",
-            dataset="${..dataset}",
-            infinite=True,
-            gpu_transform=L(torch.nn.Sequential)(*[
-                L(ToFloat)(),
-                L(RandomHorizontalFlip)(p=0.5),
-                L(Resize)(size="${data.imsize}"),
-                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
-                L(CreateCondition)(),
-            ])
-        )
-    ),
-    val=dict(
-        dataset=L(FDF256Dataset)(dirpath=data_dir.joinpath("val"), transform=None, load_keypoints=False),
-        loader=L(get_dataloader)(
-            shuffle=False, num_workers=3, drop_last=False, prefetch_factor=2,
-            batch_size="${train.batch_size}",
-            dataset="${..dataset}",
-            infinite=False,
-            gpu_transform=L(torch.nn.Sequential)(*[
-                L(ToFloat)(),
-                L(Resize)(size="${data.imsize}"),
-                L(Normalize)(mean=[.5, .5, .5], std=[.5, .5, .5], inplace=True),
-                L(CreateCondition)(),
-            ])
-        )
-    ),
-    # Training evaluation might do optimizations to reduce compute overhead. E.g. compute with AMP.
-    train_evaluation_fn=functools.partial(compute_metrics_iteratively, cache_directory=Path(metrics_cache, "fdf_val_train")),
-    evaluation_fn=functools.partial(final_eval_fn, cache_directory=Path(metrics_cache, "fdf_val"))
-)

configs/datasets/fdh.py DELETED Viewed

@@ -1,89 +0,0 @@
-import os
-from pathlib import Path
-from tops.config import LazyCall as L
-import torch
-import functools
-from dp2.data.datasets.fdh import get_dataloader_fdh_wds
-from dp2.data.utils import get_coco_flipmap
-from dp2.data.transforms.transforms import (
-    Normalize,
-    ToFloat,
-    CreateCondition,
-    RandomHorizontalFlip,
-    CreateEmbedding,
-)
-from dp2.metrics.torch_metrics import compute_metrics_iteratively
-from dp2.metrics.fid_clip import compute_fid_clip
-from .utils import final_eval_fn
-def train_eval_fn(*args, **kwargs):
-    result = compute_metrics_iteratively(*args, **kwargs)
-    result2 = compute_fid_clip(*args, **kwargs)
-    assert all(key not in result for key in result2)
-    result.update(result2)
-    return result
-dataset_base_dir = (
-    os.environ["BASE_DATASET_DIR"] if "BASE_DATASET_DIR" in os.environ else "data"
-)
-metrics_cache = (
-    os.environ["FBA_METRICS_CACHE"] if "FBA_METRICS_CACHE" in os.environ else ".cache"
-)
-data_dir = Path(dataset_base_dir, "fdh")
-data = dict(
-    imsize=(288, 160),
-    im_channels=3,
-    cse_nc=16,
-    n_keypoints=17,
-    train=dict(
-        loader=L(get_dataloader_fdh_wds)(
-            path=data_dir.joinpath("train", "out-{000000..001423}.tar"),
-            batch_size="${train.batch_size}",
-            num_workers=6,
-            transform=L(torch.nn.Sequential)(
-                L(RandomHorizontalFlip)(p=0.5, flip_map=get_coco_flipmap()),
-            ),
-            gpu_transform=L(torch.nn.Sequential)(
-                L(ToFloat)(norm=False, keys=["img", "mask", "E_mask", "maskrcnn_mask"]),
-                L(CreateEmbedding)(embed_path=data_dir.joinpath("embed_map.torch")),
-                L(Normalize)(mean=[0.5*255, 0.5*255, 0.5*255], std=[0.5*255, 0.5*255, 0.5*255], inplace=True),
-                L(CreateCondition)(),
-            ),
-            infinite=True,
-            shuffle=True,
-            partial_batches=False,
-            load_embedding=True,
-        )
-    ),
-    val=dict(
-        loader=L(get_dataloader_fdh_wds)(
-            path=data_dir.joinpath("val", "out-{000000..000023}.tar"),
-            batch_size="${train.batch_size}",
-            num_workers=6,
-            transform=None,
-            gpu_transform=L(torch.nn.Sequential)(
-                L(ToFloat)(keys=["img", "mask", "E_mask", "maskrcnn_mask"], norm=False),
-                L(CreateEmbedding)(embed_path=data_dir.joinpath("embed_map.torch")),
-                L(Normalize)(mean=[0.5*255, 0.5*255, 0.5*255], std=[0.5*255, 0.5*255, 0.5*255], inplace=True),
-                L(CreateCondition)(),
-            ),
-            infinite=False,
-            shuffle=False,
-            partial_batches=True,
-            load_embedding=True,
-        )
-    ),
-    # Training evaluation might do optimizations to reduce compute overhead. E.g. compute with AMP.
-    train_evaluation_fn=functools.partial(
-        train_eval_fn,
-        cache_directory=Path(metrics_cache, "fdh_v7_train"),
-        data_len=int(30e3),
-    ),
-    evaluation_fn=functools.partial(
-        final_eval_fn,
-        cache_directory=Path(metrics_cache, "fdh_v6_val"),
-        data_len=int(30e3),
-    ),
-)

configs/datasets/utils.py DELETED Viewed

@@ -1,12 +0,0 @@
-from dp2.metrics.ppl import calculate_ppl
-from dp2.metrics.torch_metrics import compute_metrics_iteratively
-from dp2.metrics.fid_clip import compute_fid_clip
-def final_eval_fn(*args, **kwargs):
-    result = compute_metrics_iteratively(*args, **kwargs)
-    result2 = calculate_ppl(*args, **kwargs,)
-    result2 = compute_fid_clip(*args, **kwargs)
-    assert all(key not in result for key in result2)
-    result.update(result2)
-    return result

configs/defaults.py DELETED Viewed

@@ -1,45 +0,0 @@
-import pathlib
-import os
-import torch
-from tops.config import LazyCall as L
-if "PRETRAINED_CHECKPOINTS_PATH" in os.environ:
-    PRETRAINED_CHECKPOINTS_PATH = pathlib.Path(os.environ["PRETRAINED_CHECKPOINTS_PATH"])
-else:
-    PRETRAINED_CHECKPOINTS_PATH = pathlib.Path("pretrained_checkpoints")
-if "BASE_OUTPUT_DIR" in os.environ:
-    BASE_OUTPUT_DIR = pathlib.Path(os.environ["BASE_OUTPUT_DIR"])
-else:
-    BASE_OUTPUT_DIR = pathlib.Path("outputs")
-common = dict(
-    logger_backend=["wandb", "stdout", "json", "image_dumper"],
-    wandb_project="fba_test",
-    output_dir=BASE_OUTPUT_DIR,
-    experiment_name=None, # Optional experiment name to show on wandb
-)
-train = dict(
-    batch_size=32,
-    seed=0,
-    ims_per_log=1024,
-    ims_per_val=int(200e3),
-    max_images_to_train=int(12e6),
-    amp=dict(
-        enabled=True,
-        scaler_D=L(torch.cuda.amp.GradScaler)(init_scale=2**16, growth_factor=4, growth_interval=100, enabled="${..enabled}"),
-        scaler_G=L(torch.cuda.amp.GradScaler)(init_scale=2**16, growth_factor=4, growth_interval=100, enabled="${..enabled}"),
-    ),
-    fp16_ddp_accumulate=False, # All gather gradients in fp16?
-    broadcast_buffers=False,
-    bias_act_plugin_enabled=True,
-    grid_sample_gradfix_enabled=True,
-    conv2d_gradfix_enabled=False,
-    channels_last=False,
-)
-# exponential moving average
-EMA = dict(rampup=0.05)

configs/discriminators/sg2_discriminator.py DELETED Viewed

@@ -1,42 +0,0 @@
-from tops.config import LazyCall as L
-from dp2.discriminator import SG2Discriminator
-import torch
-from dp2.loss import StyleGAN2Loss
-discriminator = L(SG2Discriminator)(
-    imsize="${data.imsize}",
-    im_channels="${data.im_channels}",
-    min_fmap_resolution=4,
-    max_cnum_mul=8,
-    cnum=80,
-    input_condition=True,
-    conv_clamp=256,
-    input_cse=False,
-    cse_nc="${data.cse_nc}"
-)
-loss_fnc = L(StyleGAN2Loss)(
-    lazy_regularization=True,
-    lazy_reg_interval=16,
-    r1_opts=dict(lambd=5, mask_out=False, mask_out_scale=False),
-    EP_lambd=0.001,
-    pl_reg_opts=dict(weight=0, batch_shrink=2,start_nimg=int(1e6), pl_decay=0.01)
-)
-def build_D_optim(type, lr, betas, lazy_regularization, lazy_reg_interval, **kwargs):
-    if lazy_regularization:
-        # From Analyzing and improving the image quality of stylegan, CVPR 2020
-        c = lazy_reg_interval / (lazy_reg_interval + 1)
-        betas = [beta ** c for beta in betas]
-        lr *= c
-        print(f"Lazy regularization on. Setting lr to: {lr}, betas to: {betas}")
-    return type(lr=lr, betas=betas, **kwargs)
-D_optim = L(build_D_optim)(
-    type=torch.optim.Adam, lr=0.001, betas=(0.0, 0.99),
-    lazy_regularization="${loss_fnc.lazy_regularization}",
-    lazy_reg_interval="${loss_fnc.lazy_reg_interval}")
-G_optim = L(torch.optim.Adam)(lr=0.001, betas=(0.0, 0.99))

configs/fdf/stylegan.py DELETED Viewed

@@ -1,14 +0,0 @@
-from ..generators.stylegan_unet import generator
-from ..datasets.fdf256 import data
-from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
-from ..defaults import train, common, EMA
-train.max_images_to_train = int(35e6)
-G_optim.lr = 0.002
-D_optim.lr = 0.002
-generator.input_cse = False
-loss_fnc.r1_opts.lambd = 1
-train.ims_per_val = int(2e6)
-common.model_url = "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/89660f04-5c11-4dbf-adac-cbe2f11b0aeea25cbf78-7558-475a-b3c7-03f5c10b7934646b0720-ca0a-4d53-aded-daddbfa45c9e"
-common.model_md5sum = "e8e32190528af2ed75f0cb792b7f2b07"

configs/fdf/stylegan_fdf128.py DELETED Viewed

@@ -1,13 +0,0 @@
-from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
-from ..datasets.fdf128 import data
-from ..generators.stylegan_unet import generator
-from ..defaults import train, common, EMA
-from tops.config import LazyCall as L
-train.max_images_to_train = int(25e6)
-G_optim.lr = 0.002
-D_optim.lr = 0.002
-generator.cnum = 128
-generator.max_cnum_mul = 4
-generator.input_cse = False
-loss_fnc.r1_opts.lambd = .1

configs/fdh/styleganL.py DELETED Viewed

@@ -1,16 +0,0 @@
-from tops.config import LazyCall as L
-from ..generators.stylegan_unet import generator
-from ..datasets.fdh import data
-from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
-from ..defaults import train, common, EMA
-train.max_images_to_train = int(50e6)
-train.batch_size = 64
-G_optim.lr = 0.002
-D_optim.lr = 0.002
-data.train.loader.num_workers = 4
-train.ims_per_val = int(1e6)
-loss_fnc.r1_opts.lambd = .1
-common.model_url = "https://api.loke.aws.unit.no/dlr-gui-backend-resources-content/v2/contents/links/21841da7-2546-4ce3-8460-909b3a63c58b13aac1a1-c778-4c8d-9b69-3e5ed2cde9de1524e76e-7aa6-4dd8-b643-52abc9f0792c"
-common.model_md5sum = "3411478b5ec600a4219cccf4499732bd"

configs/fdh/styleganL_nocse.py DELETED Viewed

@@ -1,14 +0,0 @@
-from tops.config import LazyCall as L
-from ..generators.stylegan_unet import generator
-from ..datasets.fdh import data
-from ..discriminators.sg2_discriminator import discriminator, G_optim, D_optim, loss_fnc
-from ..defaults import train, common, EMA
-train.max_images_to_train = int(50e6)
-G_optim.lr = 0.002
-D_optim.lr = 0.002
-generator.input_cse = False
-data.load_embeddings = False
-common.model_url = "https://folk.ntnu.no/haakohu/checkpoints/deep_privacy2/fdh_styleganL_nocse.ckpt"
-common.model_md5sum = "fda0d809741bc67487abada793975c37"
-generator.fix_errors = False

configs/generators/stylegan_unet.py DELETED Viewed

@@ -1,22 +0,0 @@
-from dp2.generator.stylegan_unet import StyleGANUnet
-from tops.config import LazyCall as L
-generator = L(StyleGANUnet)(
-    imsize="${data.imsize}",
-    im_channels="${data.im_channels}",
-    min_fmap_resolution=8,
-    cnum=64,
-    max_cnum_mul=8,
-    n_middle_blocks=0,
-    z_channels=512,
-    mask_output=True,
-    conv_clamp=256,
-    input_cse=True,
-    scale_grad=True,
-    cse_nc="${data.cse_nc}",
-    w_dim=512,
-    n_keypoints="${data.n_keypoints}",
-    input_keypoints=False,
-    input_keypoint_indices=[],
-    fix_errors=True
-)

deep_privacy2 ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 37dcbeb23a1f51121d53bcd80d32d086d6822b7b

dp2/__init__.py DELETED Viewed

File without changes

dp2/anonymizer/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from .anonymizer import Anonymizer

dp2/anonymizer/anonymizer.py DELETED Viewed

@@ -1,159 +0,0 @@
-from pathlib import Path
-from typing import Union, Optional
-import numpy as np
-import torch
-import tops
-import torchvision.transforms.functional as F
-from motpy import Detection, MultiObjectTracker
-from dp2.utils import load_config
-from dp2.infer import build_trained_generator
-from dp2.detection.structures import CSEPersonDetection, FaceDetection, PersonDetection, VehicleDetection
-def load_generator_from_cfg_path(cfg_path: Union[str, Path]):
-    cfg = load_config(cfg_path)
-    G = build_trained_generator(cfg)
-    tops.logger.log(f"Loaded generator from: {cfg_path}")
-    return G
-def resize_batch(img, mask, maskrcnn_mask, condition, imsize, **kwargs):
-    img = F.resize(img, imsize, antialias=True)
-    mask = (F.resize(mask, imsize, antialias=True) > 0.99).float()
-    maskrcnn_mask = (F.resize(maskrcnn_mask, imsize, antialias=True) > 0.5).float()
-    condition = img * mask
-    return dict(img=img, mask=mask, maskrcnn_mask=maskrcnn_mask, condition=condition)
-class Anonymizer:
-    def __init__(
-            self,
-            detector,
-            load_cache: bool,
-            person_G_cfg: Optional[Union[str, Path]] = None,
-            cse_person_G_cfg: Optional[Union[str, Path]] = None,
-            face_G_cfg: Optional[Union[str, Path]] = None,
-            car_G_cfg: Optional[Union[str, Path]] = None,
-            ) -> None:
-        self.detector = detector
-        self.generators = {k: None for k in [CSEPersonDetection, PersonDetection, FaceDetection, VehicleDetection]}
-        self.load_cache = load_cache
-        if cse_person_G_cfg is not None:
-            self.generators[CSEPersonDetection] = load_generator_from_cfg_path(cse_person_G_cfg)
-        if person_G_cfg is not None:
-            self.generators[PersonDetection] = load_generator_from_cfg_path(person_G_cfg)
-        if face_G_cfg is not None:
-            self.generators[FaceDetection] = load_generator_from_cfg_path(face_G_cfg)
-        if car_G_cfg is not None:
-            self.generators[VehicleDetection] = load_generator_from_cfg_path(car_G_cfg)
-    def initialize_tracker(self, fps: float):
-        self.tracker = MultiObjectTracker(dt=1/fps)
-        self.track_to_z_idx = dict()
-        self.cur_z_idx = 0
-    @torch.no_grad()
-    def anonymize_detections(self,
-            im, detection, truncation_value: float,
-            multi_modal_truncation: bool, amp: bool, z_idx,
-            all_styles=None,
-            update_identity=None,
-            ):
-        G = self.generators[type(detection)]
-        if G is None:
-            return im
-        C, H, W = im.shape
-        orig_im = im.clone()
-        if update_identity is None:
-            update_identity = [True for i in range(len(detection))]
-        for idx in range(len(detection)):
-            if not update_identity[idx]:
-                continue
-            batch = detection.get_crop(idx, im)
-            x0, y0, x1, y1 = batch.pop("boxes")[0]
-            batch = {k: tops.to_cuda(v) for k, v in batch.items()}
-            batch["img"] = F.normalize(batch["img"].float(), [0.5*255, 0.5*255, 0.5*255], [0.5*255, 0.5*255, 0.5*255])
-            batch["img"] = batch["img"].float()
-            batch["condition"] = batch["mask"] * batch["img"]
-            orig_shape = None
-            if G.imsize and batch["img"].shape[-1] != G.imsize[-1] and batch["img"].shape[-2] != G.imsize[-2]:
-                orig_shape = batch["img"].shape[-2:]
-                batch = resize_batch(**batch, imsize=G.imsize)
-            with torch.cuda.amp.autocast(amp):
-                if all_styles is not None:
-                    anonymized_im = G(**batch, s=iter(all_styles[idx]))["img"]
-                elif multi_modal_truncation and hasattr(G, "multi_modal_truncate") and hasattr(G.style_net, "w_centers"):
-                    w_indices = None
-                    if z_idx is not None:
-                        w_indices = [z_idx[idx] % len(G.style_net.w_centers)]
-                    anonymized_im = G.multi_modal_truncate(
-                        **batch, truncation_value=truncation_value,
-                        w_indices=w_indices)["img"]
-                else:
-                    z = None
-                    if z_idx is not None:
-                        state = np.random.RandomState(seed=z_idx[idx])
-                        z = state.normal(size=(1, G.z_channels))
-                        z = tops.to_cuda(torch.from_numpy(z))
-                    anonymized_im = G.sample(**batch, truncation_value=truncation_value, z=z)["img"]
-            if orig_shape is not None:
-                anonymized_im = F.resize(anonymized_im, orig_shape, antialias=True)
-            anonymized_im = (anonymized_im+1).div(2).clamp(0, 1).mul(255).round().byte()
-            # Resize and denormalize image
-            gim = F.resize(anonymized_im[0], (y1-y0, x1-x0), antialias=True)
-            mask = F.resize(batch["mask"][0], (y1-y0, x1-x0), interpolation=F.InterpolationMode.NEAREST).squeeze(0)
-            # Remove padding
-            pad = [max(-x0,0), max(-y0,0)]
-            pad = [*pad, max(x1-W,0), max(y1-H,0)]
-            remove_pad = lambda x: x[...,pad[1]:x.shape[-2]-pad[3], pad[0]:x.shape[-1]-pad[2]]
-            gim = remove_pad(gim)
-            mask = remove_pad(mask)
-            x0, y0 = max(x0, 0), max(y0, 0)
-            x1, y1 = min(x1, W), min(y1, H)
-            mask = mask.logical_not()[None].repeat(3, 1, 1)
-            im[:, y0:y1, x0:x1][mask] = gim[mask]
-        return im
-    def visualize_detection(self, im: torch.Tensor, cache_id: str = None) -> torch.Tensor:
-        all_detections = self.detector.forward_and_cache(im, cache_id, load_cache=self.load_cache)
-        for det in all_detections:
-            im = det.visualize(im)
-        return im
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor, cache_id: str = None, track=True, **synthesis_kwargs) -> torch.Tensor:
-        assert im.dtype == torch.uint8
-        im = tops.to_cuda(im)
-        all_detections = self.detector.forward_and_cache(im, cache_id, load_cache=self.load_cache)
-        if hasattr(self, "tracker") and track:
-            [_.pre_process() for _ in all_detections]
-            import numpy as np
-            boxes = np.concatenate([_.boxes for _ in all_detections])
-            boxes = [Detection(box) for box in boxes]
-            self.tracker.step(boxes)
-            track_ids = self.tracker.detections_matched_ids
-            z_idx = []
-            for track_id in track_ids:
-                if track_id not in self.track_to_z_idx:
-                    self.track_to_z_idx[track_id] = self.cur_z_idx
-                    self.cur_z_idx += 1
-                z_idx.append(self.track_to_z_idx[track_id])
-            z_idx = np.array(z_idx)
-            idx_offset = 0
-        for detection in all_detections:
-            zs = None
-            if hasattr(self, "tracker") and track:
-                zs = z_idx[idx_offset:idx_offset+len(detection)]
-                idx_offset += len(detection)
-            im = self.anonymize_detections(im, detection, z_idx=zs, **synthesis_kwargs)
-        return im.cpu()
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)

dp2/data/__init__.py DELETED Viewed

File without changes

dp2/data/build.py DELETED Viewed

@@ -1,148 +0,0 @@
-import io
-import torch
-import tops
-from .utils import collate_fn, jpg_decoder, get_num_workers, png_decoder
-def get_dataloader(
-        dataset, gpu_transform: torch.nn.Module,
-        num_workers,
-        batch_size,
-        infinite: bool,
-        drop_last: bool,
-        prefetch_factor: int,
-        shuffle,
-        channels_last=False
-        ):
-    sampler = None
-    dl_kwargs = dict(
-        pin_memory=True,
-    )
-    if infinite:
-        sampler = tops.InfiniteSampler(
-            dataset, rank=tops.rank(),
-            num_replicas=tops.world_size(),
-            shuffle=shuffle
-        )
-    elif tops.world_size() > 1:
-        sampler = torch.utils.data.DistributedSampler(
-            dataset, shuffle=shuffle, num_replicas=tops.world_size(), rank=tops.rank())
-        dl_kwargs["drop_last"] = drop_last
-    else:
-        dl_kwargs["shuffle"] = shuffle
-        dl_kwargs["drop_last"] = drop_last
-    dataloader = torch.utils.data.DataLoader(
-        dataset, sampler=sampler, collate_fn=collate_fn,
-        batch_size=batch_size,
-        num_workers=num_workers, prefetch_factor=prefetch_factor,
-        **dl_kwargs
-    )
-    dataloader = tops.DataPrefetcher(dataloader, gpu_transform, channels_last=channels_last)
-    return dataloader
-def get_dataloader_places2_wds(
-        path,
-        batch_size: int,
-        num_workers: int,
-        transform: torch.nn.Module,
-        gpu_transform: torch.nn.Module,
-        infinite: bool,
-        shuffle: bool,
-        partial_batches: bool,
-        sample_shuffle=10_000,
-        tar_shuffle=100,
-        channels_last=False,
-        ):
-    import webdataset as wds
-    import os
-    os.environ["RANK"] = str(tops.rank())
-    os.environ["WORLD_SIZE"] = str(tops.world_size())
-    if infinite:
-        pipeline = [wds.ResampledShards(str(path))]
-    else:
-        pipeline = [wds.SimpleShardList(str(path))]
-    if shuffle:
-        pipeline.append(wds.shuffle(tar_shuffle))
-    pipeline.extend([
-        wds.split_by_node,
-        wds.split_by_worker,
-    ])
-    if shuffle:
-        pipeline.append(wds.shuffle(sample_shuffle))
-    pipeline.extend([
-        wds.tarfile_to_samples(),
-        wds.decode("torchrgb8"),
-        wds.rename_keys(["img", "jpg"], ["__key__", "__key__"]),
-    ])
-    if transform is not None:
-        pipeline.append(wds.map(transform))
-    pipeline.extend([
-        wds.batched(batch_size, collation_fn=collate_fn, partial=partial_batches),
-    ])
-    pipeline = wds.DataPipeline(*pipeline)
-    if infinite:
-        pipeline = pipeline.repeat(nepochs=1000000)
-    loader = wds.WebLoader(
-        pipeline, batch_size=None, shuffle=False,
-        num_workers=get_num_workers(num_workers),
-        persistent_workers=True,
-    )
-    loader = tops.DataPrefetcher(loader, gpu_transform, channels_last=channels_last, to_float=False)
-    return loader
-def get_dataloader_celebAHQ_wds(
-        path,
-        batch_size: int,
-        num_workers: int,
-        transform: torch.nn.Module,
-        gpu_transform: torch.nn.Module,
-        infinite: bool,
-        shuffle: bool,
-        partial_batches: bool,
-        sample_shuffle=10_000,
-        tar_shuffle=100,
-        channels_last=False,
-        ):
-    import webdataset as wds
-    import os
-    os.environ["RANK"] = str(tops.rank())
-    os.environ["WORLD_SIZE"] = str(tops.world_size())
-    if infinite:
-        pipeline = [wds.ResampledShards(str(path))]
-    else:
-        pipeline = [wds.SimpleShardList(str(path))]
-    if shuffle:
-        pipeline.append(wds.shuffle(tar_shuffle))
-    pipeline.extend([
-        wds.split_by_node,
-        wds.split_by_worker,
-    ])
-    if shuffle:
-        pipeline.append(wds.shuffle(sample_shuffle))
-    pipeline.extend([
-        wds.tarfile_to_samples(),
-        wds.decode(wds.handle_extension(".png", png_decoder)),
-        wds.rename_keys(["img", "png"], ["__key__", "__key__"]),
-    ])
-    if transform is not None:
-        pipeline.append(wds.map(transform))
-    pipeline.extend([
-        wds.batched(batch_size, collation_fn=collate_fn, partial=partial_batches),
-    ])
-    pipeline = wds.DataPipeline(*pipeline)
-    if infinite:
-        pipeline = pipeline.repeat(nepochs=1000000)
-    loader = wds.WebLoader(
-        pipeline, batch_size=None, shuffle=False,
-        num_workers=get_num_workers(num_workers),
-        persistent_workers=True,
-    )
-    loader = tops.DataPrefetcher(loader, gpu_transform, channels_last=channels_last)
-    return loader

dp2/data/datasets/__init__.py DELETED Viewed

File without changes

dp2/data/datasets/coco_cse.py DELETED Viewed

@@ -1,148 +0,0 @@
-import pickle
-import torchvision
-import torch
-import pathlib
-import numpy as np
-from typing import Callable, Optional, Union
-from torch.hub import get_dir as get_hub_dir
-def cache_embed_stats(embed_map: torch.Tensor):
-    mean = embed_map.mean(dim=0, keepdim=True)
-    rstd = ((embed_map - mean).square().mean(dim=0, keepdim=True)+1e-8).rsqrt()
-    cache = dict(mean=mean, rstd=rstd, embed_map=embed_map)
-    path = pathlib.Path(get_hub_dir(), f"embed_map_stats.torch")
-    path.parent.mkdir(exist_ok=True, parents=True)
-    torch.save(cache, path)
-class CocoCSE(torch.utils.data.Dataset):
-    def __init__(self,
-                 dirpath: Union[str, pathlib.Path],
-                 transform: Optional[Callable],
-                 normalize_E: bool,):
-        dirpath = pathlib.Path(dirpath)
-        self.dirpath = dirpath
-        self.transform = transform
-        assert self.dirpath.is_dir(),\
-            f"Did not find dataset at: {dirpath}"
-        self.image_paths, self.embedding_paths = self._load_impaths()
-        self.embed_map = torch.from_numpy(np.load(self.dirpath.joinpath("embed_map.npy")))
-        mean = self.embed_map.mean(dim=0, keepdim=True)
-        rstd = ((self.embed_map - mean).square().mean(dim=0, keepdim=True)+1e-8).rsqrt()
-        self.embed_map = (self.embed_map - mean) * rstd
-        cache_embed_stats(self.embed_map)
-    def _load_impaths(self):
-        image_dir = self.dirpath.joinpath("images")
-        image_paths = list(image_dir.glob("*.png"))
-        image_paths.sort()
-        embedding_paths = [
-            self.dirpath.joinpath("embedding", x.stem + ".npy") for x in image_paths
-            ]
-        return image_paths, embedding_paths
-    def __len__(self):
-        return len(self.image_paths)
-    def __getitem__(self, idx):
-        im = torchvision.io.read_image(str(self.image_paths[idx]))
-        vertices, mask, border = np.split(np.load(self.embedding_paths[idx]), 3, axis=-1)
-        vertices = torch.from_numpy(vertices.squeeze()).long()
-        mask = torch.from_numpy(mask.squeeze()).float()
-        border = torch.from_numpy(border.squeeze()).float()
-        E_mask = 1 - mask - border
-        batch = {
-            "img": im,
-            "vertices": vertices[None],
-            "mask": mask[None],
-            "embed_map": self.embed_map,
-            "border": border[None],
-            "E_mask": E_mask[None]
-        }
-        if self.transform is None:
-            return batch
-        return self.transform(batch)
-class CocoCSEWithFace(CocoCSE):
-    def __init__(self,
-                 dirpath: Union[str, pathlib.Path],
-                 transform: Optional[Callable],
-                 **kwargs):
-        super().__init__(dirpath, transform, **kwargs)
-        with open(self.dirpath.joinpath("face_boxes_XYXY.pickle"), "rb") as fp:
-            self.face_boxes = pickle.load(fp)
-    def __getitem__(self, idx):
-        item = super().__getitem__(idx)
-        item["boxes_XYXY"] = self.face_boxes[self.image_paths[idx].name]
-        return item
-class CocoCSESemantic(torch.utils.data.Dataset):
-    def __init__(self,
-                 dirpath: Union[str, pathlib.Path],
-                 transform: Optional[Callable],
-                 **kwargs):
-        dirpath = pathlib.Path(dirpath)
-        self.dirpath = dirpath
-        self.transform = transform
-        assert self.dirpath.is_dir(),\
-            f"Did not find dataset at: {dirpath}"
-        self.image_paths, self.embedding_paths = self._load_impaths()
-        self.vertx2cat = torch.from_numpy(np.load(self.dirpath.parent.joinpath("vertx2cat.npy")))
-        self.embed_map = torch.from_numpy(np.load(self.dirpath.joinpath("embed_map.npy")))
-    def _load_impaths(self):
-        image_dir = self.dirpath.joinpath("images")
-        image_paths = list(image_dir.glob("*.png"))
-        image_paths.sort()
-        embedding_paths = [
-            self.dirpath.joinpath("embedding", x.stem + ".npy") for x in image_paths
-            ]
-        return image_paths, embedding_paths
-    def __len__(self):
-        return len(self.image_paths)
-    def __getitem__(self, idx):
-        im = torchvision.io.read_image(str(self.image_paths[idx]))
-        vertices, mask, border = np.split(np.load(self.embedding_paths[idx]), 3, axis=-1)
-        vertices = torch.from_numpy(vertices.squeeze()).long()
-        mask = torch.from_numpy(mask.squeeze()).float()
-        border = torch.from_numpy(border.squeeze()).float()
-        E_mask = 1 - mask - border
-        batch = {
-            "img": im,
-            "vertices": vertices[None],
-            "mask": mask[None],
-            "border": border[None],
-            "vertx2cat": self.vertx2cat,
-            "embed_map": self.embed_map,
-        }
-        if self.transform is None:
-            return batch
-        return self.transform(batch)
-class CocoCSESemanticWithFace(CocoCSESemantic):
-    def __init__(self,
-                 dirpath: Union[str, pathlib.Path],
-                 transform: Optional[Callable],
-                 **kwargs):
-        super().__init__(dirpath, transform, **kwargs)
-        with open(self.dirpath.joinpath("face_boxes_XYXY.pickle"), "rb") as fp:
-            self.face_boxes = pickle.load(fp)
-    def __getitem__(self, idx):
-        item = super().__getitem__(idx)
-        item["boxes_XYXY"] = self.face_boxes[self.image_paths[idx].name]
-        return item

dp2/data/datasets/fdf.py DELETED Viewed

@@ -1,129 +0,0 @@
-import pathlib
-from typing import Tuple
-import numpy as np
-import torch
-import pathlib
-try:
-    import pyspng
-    PYSPNG_IMPORTED = True
-except ImportError:
-    PYSPNG_IMPORTED = False
-    print("Could not load pyspng. Defaulting to pillow image backend.")
-    from  PIL import Image
-from tops import logger
-class FDFDataset:
-    def __init__(self,
-                 dirpath,
-                 imsize: Tuple[int],
-                 load_keypoints: bool,
-                 transform):
-        dirpath = pathlib.Path(dirpath)
-        self.dirpath = dirpath
-        self.transform = transform
-        self.imsize = imsize[0]
-        self.load_keypoints = load_keypoints
-        assert self.dirpath.is_dir(),\
-            f"Did not find dataset at: {dirpath}"
-        image_dir = self.dirpath.joinpath("images", str(self.imsize))
-        self.image_paths = list(image_dir.glob("*.png"))
-        assert len(self.image_paths) > 0,\
-            f"Did not find images in: {image_dir}"
-        self.image_paths.sort(key=lambda x: int(x.stem))
-        self.landmarks = np.load(self.dirpath.joinpath("landmarks.npy")).reshape(-1, 7, 2).astype(np.float32)
-        self.bounding_boxes = torch.load(self.dirpath.joinpath("bounding_box", f"{self.imsize}.torch"))
-        assert len(self.image_paths) == len(self.bounding_boxes)
-        assert len(self.image_paths) == len(self.landmarks)
-        logger.log(
-            f"Dataset loaded from: {dirpath}. Number of samples:{len(self)}, imsize={imsize}")
-    def get_mask(self, idx):
-        mask = torch.ones((1, self.imsize, self.imsize), dtype=torch.bool)
-        bounding_box = self.bounding_boxes[idx]
-        x0, y0, x1, y1 = bounding_box
-        mask[:, y0:y1, x0:x1] = 0
-        return mask
-    def __len__(self):
-        return len(self.image_paths)
-    def __getitem__(self, index):
-        impath = self.image_paths[index]
-        if PYSPNG_IMPORTED:
-            with open(impath, "rb") as fp:
-                im = pyspng.load(fp.read())
-        else:
-            with Image.open(impath) as fp:
-                im = np.array(fp)
-        im = torch.from_numpy(np.rollaxis(im, -1, 0))
-        masks = self.get_mask(index)
-        landmark = self.landmarks[index]
-        batch = {
-            "img": im,
-            "mask": masks,
-        }
-        if self.load_keypoints:
-            batch["keypoints"] = landmark
-        if self.transform is None:
-            return batch
-        return self.transform(batch)
-class FDF256Dataset:
-    def __init__(self,
-                 dirpath,
-                 load_keypoints: bool,
-                 transform):
-        dirpath = pathlib.Path(dirpath)
-        self.dirpath = dirpath
-        self.transform = transform
-        self.load_keypoints = load_keypoints
-        assert self.dirpath.is_dir(),\
-            f"Did not find dataset at: {dirpath}"
-        image_dir = self.dirpath.joinpath("images")
-        self.image_paths = list(image_dir.glob("*.png"))
-        assert len(self.image_paths) > 0,\
-            f"Did not find images in: {image_dir}"
-        self.image_paths.sort(key=lambda x: int(x.stem))
-        self.landmarks = np.load(self.dirpath.joinpath("landmarks.npy")).reshape(-1, 7, 2).astype(np.float32)
-        self.bounding_boxes = torch.from_numpy(np.load(self.dirpath.joinpath("bounding_box.npy")))
-        assert len(self.image_paths) == len(self.bounding_boxes)
-        assert len(self.image_paths) == len(self.landmarks)
-        logger.log(
-            f"Dataset loaded from: {dirpath}. Number of samples:{len(self)}")
-    def get_mask(self, idx):
-        mask = torch.ones((1, 256, 256), dtype=torch.bool)
-        bounding_box = self.bounding_boxes[idx]
-        x0, y0, x1, y1 = bounding_box
-        mask[:, y0:y1, x0:x1] = 0
-        return mask
-    def __len__(self):
-        return len(self.image_paths)
-    def __getitem__(self, index):
-        impath = self.image_paths[index]
-        if PYSPNG_IMPORTED:
-            with open(impath, "rb") as fp:
-                im = pyspng.load(fp.read())
-        else:
-            with Image.open(impath) as fp:
-                im = np.array(fp)
-        im = torch.from_numpy(np.rollaxis(im, -1, 0))
-        masks = self.get_mask(index)
-        landmark = self.landmarks[index]
-        batch = {
-            "img": im,
-            "mask": masks,
-        }
-        if self.load_keypoints:
-            batch["keypoints"] = landmark
-        if self.transform is None:
-            return batch
-        return self.transform(batch)

dp2/data/datasets/fdh.py DELETED Viewed

@@ -1,104 +0,0 @@
-import torch
-import tops
-import numpy as np
-import io
-import webdataset as wds
-import os
-from ..utils import png_decoder, mask_decoder, get_num_workers, collate_fn
-def kp_decoder(x):
-    # Keypoints are between [0, 1] for webdataset
-    keypoints = torch.from_numpy(np.load(io.BytesIO(x))).float()
-    keypoints[:, 0] /= 160
-    keypoints[:, 1] /= 288
-    check_outside = lambda x: (x < 0).logical_or(x > 1)
-    is_outside = check_outside(keypoints[:, 0]).logical_or(
-        check_outside(keypoints[:, 1])
-    )
-    keypoints[:, 2] = (keypoints[:, 2] > 0).logical_and(is_outside.logical_not())
-    return keypoints
-def vertices_decoder(x):
-    vertices = torch.from_numpy(np.load(io.BytesIO(x)).astype(np.int32))
-    return vertices.squeeze()[None]
-def get_dataloader_fdh_wds(
-        path,
-        batch_size: int,
-        num_workers: int,
-        transform: torch.nn.Module,
-        gpu_transform: torch.nn.Module,
-        infinite: bool,
-        shuffle: bool,
-        partial_batches: bool,
-        load_embedding: bool,
-        sample_shuffle=10_000,
-        tar_shuffle=100,
-        read_condition=False,
-        channels_last=False,
-        ):
-    # Need to set this for split_by_node to work.
-    os.environ["RANK"] = str(tops.rank())
-    os.environ["WORLD_SIZE"] = str(tops.world_size())
-    if infinite:
-        pipeline = [wds.ResampledShards(str(path))]
-    else:
-        pipeline = [wds.SimpleShardList(str(path))]
-    if shuffle:
-        pipeline.append(wds.shuffle(tar_shuffle))
-    pipeline.extend([
-        wds.split_by_node,
-        wds.split_by_worker,
-    ])
-    if shuffle:
-        pipeline.append(wds.shuffle(sample_shuffle))
-    decoder = [
-        wds.handle_extension("image.png", png_decoder),
-        wds.handle_extension("mask.png", mask_decoder),
-        wds.handle_extension("maskrcnn_mask.png", mask_decoder),
-        wds.handle_extension("keypoints.npy", kp_decoder),
-    ]
-    rename_keys = [
-        ["img", "image.png"], ["mask", "mask.png"],
-        ["keypoints", "keypoints.npy"], ["maskrcnn_mask", "maskrcnn_mask.png"]
-    ]
-    if load_embedding:
-        decoder.extend([
-            wds.handle_extension("vertices.npy", vertices_decoder),
-            wds.handle_extension("E_mask.png", mask_decoder)
-        ])
-        rename_keys.extend([
-            ["vertices", "vertices.npy"],
-            ["E_mask", "e_mask.png"]
-        ])
-    if read_condition:
-        decoder.append(
-            wds.handle_extension("condition.png", png_decoder)
-        )
-        rename_keys.append(["condition", "condition.png"])
-    pipeline.extend([
-        wds.tarfile_to_samples(),
-        wds.decode(*decoder),
-        wds.rename_keys(*rename_keys),
-        wds.batched(batch_size, collation_fn=collate_fn, partial=partial_batches),
-    ])
-    if transform is not None:
-        pipeline.append(wds.map(transform))
-    pipeline = wds.DataPipeline(*pipeline)
-    if infinite:
-        pipeline = pipeline.repeat(nepochs=1000000)
-    loader = wds.WebLoader(
-        pipeline, batch_size=None, shuffle=False,
-        num_workers=get_num_workers(num_workers),
-        persistent_workers=True,
-    )
-    loader = tops.DataPrefetcher(loader, gpu_transform, channels_last=channels_last, to_float=False)
-    return loader

dp2/data/transforms/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- from .transforms import RandomCrop, CreateCondition, CreateEmbedding, Resize, ToFloat, Normalize
2	- from .stylegan2_transform import StyleGANAugmentPipe

dp2/data/transforms/functional.py DELETED Viewed

@@ -1,61 +0,0 @@
-import torchvision.transforms.functional as F
-import torch
-import pickle
-from tops import download_file, assert_shape
-from typing import  Dict
-from functools import lru_cache
-global symmetry_transform
-@lru_cache(maxsize=1)
-def get_symmetry_transform(symmetry_url):
-    file_name = download_file(symmetry_url)
-    with open(file_name, "rb") as fp:
-        symmetry = pickle.load(fp)
-    return torch.from_numpy(symmetry["vertex_transforms"]).long()
-hflip_handled_cases = set([
-    "keypoints", "img", "mask", "border", "semantic_mask", "vertices", "E_mask", "embed_map", "condition",
-    "embedding", "vertx2cat", "maskrcnn_mask", "__key__",
-    "img_hr", "condition_hr", "mask_hr"])
-def hflip(container: Dict[str, torch.Tensor], flip_map=None) -> Dict[str, torch.Tensor]:
-    container["img"] = F.hflip(container["img"])
-    if "condition" in container:
-        container["condition"] = F.hflip(container["condition"])
-    if "embedding" in container:
-        container["embedding"] = F.hflip(container["embedding"])
-    assert all([key in hflip_handled_cases for key in container]), container.keys()
-    if "keypoints" in container:
-        assert flip_map is not None
-        if container["keypoints"].ndim == 3:
-            keypoints = container["keypoints"][:, flip_map, :]
-            keypoints[:, :,  0] = 1 - keypoints[:, :,  0]
-        else:
-            assert_shape(container["keypoints"], (None, 3))
-            keypoints = container["keypoints"][flip_map, :]
-            keypoints[:, 0] = 1 - keypoints[:, 0]
-        container["keypoints"] = keypoints
-    if "mask" in container:
-        container["mask"] = F.hflip(container["mask"])
-    if "border" in container:
-        container["border"] = F.hflip(container["border"])
-    if "semantic_mask" in container:
-        container["semantic_mask"] = F.hflip(container["semantic_mask"])
-    if "vertices" in container:
-        symmetry_transform = get_symmetry_transform("https://dl.fbaipublicfiles.com/densepose/meshes/symmetry/symmetry_smpl_27554.pkl")
-        container["vertices"] = F.hflip(container["vertices"])
-        symmetry_transform_ = symmetry_transform.to(container["vertices"].device)
-        container["vertices"] = symmetry_transform_[container["vertices"].long()]
-    if "E_mask" in container:
-        container["E_mask"] = F.hflip(container["E_mask"])
-    if "maskrcnn_mask" in container:
-        container["maskrcnn_mask"] = F.hflip(container["maskrcnn_mask"])
-    if "img_hr" in container:
-        container["img_hr"] = F.hflip(container["img_hr"])
-    if "condition_hr" in container:
-        container["condition_hr"] = F.hflip(container["condition_hr"])
-    if "mask_hr" in container:
-        container["mask_hr"] = F.hflip(container["mask_hr"])
-    return container

dp2/data/transforms/stylegan2_transform.py DELETED Viewed

@@ -1,394 +0,0 @@
-import numpy as np
-import scipy.signal
-import torch
-try:
-    from sg3_torch_utils import misc
-    from sg3_torch_utils.ops import upfirdn2d
-    from sg3_torch_utils.ops import grid_sample_gradfix
-    from sg3_torch_utils.ops import conv2d_gradfix
-except:
-    pass
-#----------------------------------------------------------------------------
-# Coefficients of various wavelet decomposition low-pass filters.
-wavelets = {
-    'haar': [0.7071067811865476, 0.7071067811865476],
-    'db1':  [0.7071067811865476, 0.7071067811865476],
-    'db2':  [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025],
-    'db3':  [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569],
-    'db4':  [-0.010597401784997278, 0.032883011666982945, 0.030841381835986965, -0.18703481171888114, -0.02798376941698385, 0.6308807679295904, 0.7148465705525415, 0.23037781330885523],
-    'db5':  [0.003335725285001549, -0.012580751999015526, -0.006241490213011705, 0.07757149384006515, -0.03224486958502952, -0.24229488706619015, 0.13842814590110342, 0.7243085284385744, 0.6038292697974729, 0.160102397974125],
-    'db6':  [-0.00107730108499558, 0.004777257511010651, 0.0005538422009938016, -0.031582039318031156, 0.02752286553001629, 0.09750160558707936, -0.12976686756709563, -0.22626469396516913, 0.3152503517092432, 0.7511339080215775, 0.4946238903983854, 0.11154074335008017],
-    'db7':  [0.0003537138000010399, -0.0018016407039998328, 0.00042957797300470274, 0.012550998556013784, -0.01657454163101562, -0.03802993693503463, 0.0806126091510659, 0.07130921926705004, -0.22403618499416572, -0.14390600392910627, 0.4697822874053586, 0.7291320908465551, 0.39653931948230575, 0.07785205408506236],
-    'db8':  [-0.00011747678400228192, 0.0006754494059985568, -0.0003917403729959771, -0.00487035299301066, 0.008746094047015655, 0.013981027917015516, -0.04408825393106472, -0.01736930100202211, 0.128747426620186, 0.00047248457399797254, -0.2840155429624281, -0.015829105256023893, 0.5853546836548691, 0.6756307362980128, 0.3128715909144659, 0.05441584224308161],
-    'sym2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025],
-    'sym3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569],
-    'sym4': [-0.07576571478927333, -0.02963552764599851, 0.49761866763201545, 0.8037387518059161, 0.29785779560527736, -0.09921954357684722, -0.012603967262037833, 0.0322231006040427],
-    'sym5': [0.027333068345077982, 0.029519490925774643, -0.039134249302383094, 0.1993975339773936, 0.7234076904024206, 0.6339789634582119, 0.01660210576452232, -0.17532808990845047, -0.021101834024758855, 0.019538882735286728],
-    'sym6': [0.015404109327027373, 0.0034907120842174702, -0.11799011114819057, -0.048311742585633, 0.4910559419267466, 0.787641141030194, 0.3379294217276218, -0.07263752278646252, -0.021060292512300564, 0.04472490177066578, 0.0017677118642428036, -0.007800708325034148],
-    'sym7': [0.002681814568257878, -0.0010473848886829163, -0.01263630340325193, 0.03051551316596357, 0.0678926935013727, -0.049552834937127255, 0.017441255086855827, 0.5361019170917628, 0.767764317003164, 0.2886296317515146, -0.14004724044296152, -0.10780823770381774, 0.004010244871533663, 0.010268176708511255],
-    'sym8': [-0.0033824159510061256, -0.0005421323317911481, 0.03169508781149298, 0.007607487324917605, -0.1432942383508097, -0.061273359067658524, 0.4813596512583722, 0.7771857517005235, 0.3644418948353314, -0.05194583810770904, -0.027219029917056003, 0.049137179673607506, 0.003808752013890615, -0.01495225833704823, -0.0003029205147213668, 0.0018899503327594609],
-}
-#----------------------------------------------------------------------------
-# Helpers for constructing transformation matrices.
-def matrix(*rows, device=None):
-    assert all(len(row) == len(rows[0]) for row in rows)
-    elems = [x for row in rows for x in row]
-    ref = [x for x in elems if isinstance(x, torch.Tensor)]
-    if len(ref) == 0:
-        return misc.constant(np.asarray(rows), device=device)
-    assert device is None or device == ref[0].device
-    elems = [x if isinstance(x, torch.Tensor) else misc.constant(x, shape=ref[0].shape, device=ref[0].device) for x in elems]
-    return torch.stack(elems, dim=-1).reshape(ref[0].shape + (len(rows), -1))
-def translate2d(tx, ty, **kwargs):
-    return matrix(
-        [1, 0, tx],
-        [0, 1, ty],
-        [0, 0, 1],
-        **kwargs)
-def translate3d(tx, ty, tz, **kwargs):
-    return matrix(
-        [1, 0, 0, tx],
-        [0, 1, 0, ty],
-        [0, 0, 1, tz],
-        [0, 0, 0, 1],
-        **kwargs)
-def scale2d(sx, sy, **kwargs):
-    return matrix(
-        [sx, 0,  0],
-        [0,  sy, 0],
-        [0,  0,  1],
-        **kwargs)
-def scale3d(sx, sy, sz, **kwargs):
-    return matrix(
-        [sx, 0,  0,  0],
-        [0,  sy, 0,  0],
-        [0,  0,  sz, 0],
-        [0,  0,  0,  1],
-        **kwargs)
-def rotate2d(theta, **kwargs):
-    return matrix(
-        [torch.cos(theta), torch.sin(-theta), 0],
-        [torch.sin(theta), torch.cos(theta),  0],
-        [0,                0,                 1],
-        **kwargs)
-def rotate3d(v, theta, **kwargs):
-    vx = v[..., 0]; vy = v[..., 1]; vz = v[..., 2]
-    s = torch.sin(theta); c = torch.cos(theta); cc = 1 - c
-    return matrix(
-        [vx*vx*cc+c,    vx*vy*cc-vz*s, vx*vz*cc+vy*s, 0],
-        [vy*vx*cc+vz*s, vy*vy*cc+c,    vy*vz*cc-vx*s, 0],
-        [vz*vx*cc-vy*s, vz*vy*cc+vx*s, vz*vz*cc+c,    0],
-        [0,             0,             0,             1],
-        **kwargs)
-def translate2d_inv(tx, ty, **kwargs):
-    return translate2d(-tx, -ty, **kwargs)
-def scale2d_inv(sx, sy, **kwargs):
-    return scale2d(1 / sx, 1 / sy, **kwargs)
-def rotate2d_inv(theta, **kwargs):
-    return rotate2d(-theta, **kwargs)
-class StyleGANAugmentPipe(torch.nn.Module):
-    def __init__(self,
-        rotate90=0, xint=0, xint_max=0.125,
-        scale=0, rotate=0, aniso=0, xfrac=0, scale_std=0.2, rotate_max=1, aniso_std=0.2, xfrac_std=0.125,
-        brightness=0, contrast=0, lumaflip=0, hue=0, saturation=0, brightness_std=0.2, contrast_std=0.5,
-        hue_max=1, saturation_std=1,
-        imgfilter=0, imgfilter_bands=[1,1,1,1], imgfilter_std=1,
-        ):
-        super().__init__()
-        self.register_buffer('p', torch.ones([]))       # Overall multiplier for augmentation probability.
-        # Pixel blitting.
-        self.rotate90         = float(rotate90)         # Probability multiplier for 90 degree rotations.
-        self.xint             = float(xint)             # Probability multiplier for integer translation.
-        self.xint_max         = float(xint_max)         # Range of integer translation, relative to image dimensions.
-        # General geometric transformations.
-        self.scale            = float(scale)            # Probability multiplier for isotropic scaling.
-        self.rotate           = float(rotate)           # Probability multiplier for arbitrary rotation.
-        self.aniso            = float(aniso)            # Probability multiplier for anisotropic scaling.
-        self.xfrac            = float(xfrac)            # Probability multiplier for fractional translation.
-        self.scale_std        = float(scale_std)        # Log2 standard deviation of isotropic scaling.
-        self.rotate_max       = float(rotate_max)       # Range of arbitrary rotation, 1 = full circle.
-        self.aniso_std        = float(aniso_std)        # Log2 standard deviation of anisotropic scaling.
-        self.xfrac_std        = float(xfrac_std)        # Standard deviation of frational translation, relative to image dimensions.
-        # Color transformations.
-        self.brightness       = float(brightness)       # Probability multiplier for brightness.
-        self.contrast         = float(contrast)         # Probability multiplier for contrast.
-        self.lumaflip         = float(lumaflip)         # Probability multiplier for luma flip.
-        self.hue              = float(hue)              # Probability multiplier for hue rotation.
-        self.saturation       = float(saturation)       # Probability multiplier for saturation.
-        self.brightness_std   = float(brightness_std)   # Standard deviation of brightness.
-        self.contrast_std     = float(contrast_std)     # Log2 standard deviation of contrast.
-        self.hue_max          = float(hue_max)          # Range of hue rotation, 1 = full circle.
-        self.saturation_std   = float(saturation_std)   # Log2 standard deviation of saturation.
-        # Image-space filtering.
-        self.imgfilter        = float(imgfilter)        # Probability multiplier for image-space filtering.
-        self.imgfilter_bands  = list(imgfilter_bands)   # Probability multipliers for individual frequency bands.
-        self.imgfilter_std    = float(imgfilter_std)    # Log2 standard deviation of image-space filter amplification.
-        # Setup orthogonal lowpass filter for geometric augmentations.
-        self.register_buffer('Hz_geom', upfirdn2d.setup_filter(wavelets['sym6']))
-        # Construct filter bank for image-space filtering.
-        Hz_lo = np.asarray(wavelets['sym2'])            # H(z)
-        Hz_hi = Hz_lo * ((-1) ** np.arange(Hz_lo.size)) # H(-z)
-        Hz_lo2 = np.convolve(Hz_lo, Hz_lo[::-1]) / 2    # H(z) * H(z^-1) / 2
-        Hz_hi2 = np.convolve(Hz_hi, Hz_hi[::-1]) / 2    # H(-z) * H(-z^-1) / 2
-        Hz_fbank = np.eye(4, 1)                         # Bandpass(H(z), b_i)
-        for i in range(1, Hz_fbank.shape[0]):
-            Hz_fbank = np.dstack([Hz_fbank, np.zeros_like(Hz_fbank)]).reshape(Hz_fbank.shape[0], -1)[:, :-1]
-            Hz_fbank = scipy.signal.convolve(Hz_fbank, [Hz_lo2])
-            Hz_fbank[i, (Hz_fbank.shape[1] - Hz_hi2.size) // 2 : (Hz_fbank.shape[1] + Hz_hi2.size) // 2] += Hz_hi2
-        self.register_buffer('Hz_fbank', torch.as_tensor(Hz_fbank, dtype=torch.float32))
-    def forward(self, batch, debug_percentile=None):
-        images = batch["img"]
-        batch["vertices"] = batch["vertices"].float()
-        assert isinstance(images, torch.Tensor) and images.ndim == 4
-        batch_size, num_channels, height, width = images.shape
-        device = images.device
-        self.Hz_fbank = self.Hz_fbank.to(device)
-        self.Hz_geom = self.Hz_geom.to(device)
-        if debug_percentile is not None:
-            debug_percentile = torch.as_tensor(debug_percentile, dtype=torch.float32, device=device)
-        # -------------------------------------
-        # Select parameters for pixel blitting.
-        # -------------------------------------
-        # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in
-        I_3 = torch.eye(3, device=device)
-        G_inv = I_3
-        # Apply integer translation with probability (xint * strength).
-        if self.xint > 0:
-            t = (torch.rand([batch_size, 2], device=device) * 2 - 1) * self.xint_max
-            t = torch.where(torch.rand([batch_size, 1], device=device) < self.xint * self.p, t, torch.zeros_like(t))
-            if debug_percentile is not None:
-                t = torch.full_like(t, (debug_percentile * 2 - 1) * self.xint_max)
-            G_inv = G_inv @ translate2d_inv(torch.round(t[:,0] * width), torch.round(t[:,1] * height))
-        # --------------------------------------------------------
-        # Select parameters for general geometric transformations.
-        # --------------------------------------------------------
-        # Apply isotropic scaling with probability (scale * strength).
-        if self.scale > 0:
-            s = torch.exp2(torch.randn([batch_size], device=device) * self.scale_std)
-            s = torch.where(torch.rand([batch_size], device=device) < self.scale * self.p, s, torch.ones_like(s))
-            if debug_percentile is not None:
-                s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.scale_std))
-            G_inv = G_inv @ scale2d_inv(s, s)
-        # Apply pre-rotation with probability p_rot.
-        p_rot = 1 - torch.sqrt((1 - self.rotate * self.p).clamp(0, 1)) # P(pre OR post) = p
-        if self.rotate > 0:
-            theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max
-            theta = torch.where(torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta))
-            if debug_percentile is not None:
-                theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.rotate_max)
-            G_inv = G_inv @ rotate2d_inv(-theta) # Before anisotropic scaling.
-        # Apply anisotropic scaling with probability (aniso * strength).
-        if self.aniso > 0:
-            s = torch.exp2(torch.randn([batch_size], device=device) * self.aniso_std)
-            s = torch.where(torch.rand([batch_size], device=device) < self.aniso * self.p, s, torch.ones_like(s))
-            if debug_percentile is not None:
-                s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.aniso_std))
-            G_inv = G_inv @ scale2d_inv(s, 1 / s)
-        # Apply post-rotation with probability p_rot.
-        if self.rotate > 0:
-            theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max
-            theta = torch.where(torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta))
-            if debug_percentile is not None:
-                theta = torch.zeros_like(theta)
-            G_inv = G_inv @ rotate2d_inv(-theta) # After anisotropic scaling.
-        # Apply fractional translation with probability (xfrac * strength).
-        if self.xfrac > 0:
-            t = torch.randn([batch_size, 2], device=device) * self.xfrac_std
-            t = torch.where(torch.rand([batch_size, 1], device=device) < self.xfrac * self.p, t, torch.zeros_like(t))
-            if debug_percentile is not None:
-                t = torch.full_like(t, torch.erfinv(debug_percentile * 2 - 1) * self.xfrac_std)
-            G_inv = G_inv @ translate2d_inv(t[:,0] * width, t[:,1] * height)
-        # ----------------------------------
-        # Execute geometric transformations.
-        # ----------------------------------
-        # Execute if the transform is not identity.
-        if G_inv is not I_3:
-            # Calculate padding.
-            cx = (width - 1) / 2
-            cy = (height - 1) / 2
-            cp = matrix([-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1], device=device) # [idx, xyz]
-            cp = G_inv @ cp.t() # [batch, xyz, idx]
-            Hz_pad = self.Hz_geom.shape[0] // 4
-            margin = cp[:, :2, :].permute(1, 0, 2).flatten(1) # [xy, batch * idx]
-            margin = torch.cat([-margin, margin]).max(dim=1).values # [x0, y0, x1, y1]
-            margin = margin + misc.constant([Hz_pad * 2 - cx, Hz_pad * 2 - cy] * 2, device=device)
-            margin = margin.max(misc.constant([0, 0] * 2, device=device))
-            margin = margin.min(misc.constant([width-1, height-1] * 2, device=device))
-            mx0, my0, mx1, my1 = margin.ceil().to(torch.int32)
-            # Pad image and adjust origin.
-            images = torch.nn.functional.pad(input=images, pad=[mx0,mx1,my0,my1], mode='reflect')
-            batch["mask"]  = torch.nn.functional.pad(input=batch["mask"], pad=[mx0,mx1,my0,my1], mode='constant', value=1.0)
-            batch["E_mask"]  = torch.nn.functional.pad(input=batch["E_mask"], pad=[mx0,mx1,my0,my1], mode='constant', value=0.0)
-            batch["vertices"]  = torch.nn.functional.pad(input=batch["vertices"], pad=[mx0,mx1,my0,my1], mode='constant', value=0.0)
-            G_inv = translate2d((mx0 - mx1) / 2, (my0 - my1) / 2) @ G_inv
-            # Upsample.
-            images = upfirdn2d.upsample2d(x=images, f=self.Hz_geom, up=2)
-            batch["mask"] = torch.nn.functional.interpolate(batch["mask"], scale_factor=2, mode="nearest")
-            batch["E_mask"] = torch.nn.functional.interpolate(batch["E_mask"], scale_factor=2, mode="nearest")
-            batch["vertices"] = torch.nn.functional.interpolate(batch["vertices"], scale_factor=2, mode="nearest")
-            G_inv = scale2d(2, 2, device=device) @ G_inv @ scale2d_inv(2, 2, device=device)
-            G_inv = translate2d(-0.5, -0.5, device=device) @ G_inv @ translate2d_inv(-0.5, -0.5, device=device)
-            # Execute transformation.
-            shape = [batch_size, num_channels, (height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2]
-            G_inv = scale2d(2 / images.shape[3], 2 / images.shape[2], device=device) @ G_inv @ scale2d_inv(2 / shape[3], 2 / shape[2], device=device)
-            grid = torch.nn.functional.affine_grid(theta=G_inv[:,:2,:], size=shape, align_corners=False)
-            images = grid_sample_gradfix.grid_sample(images, grid)
-            batch["mask"] = torch.nn.functional.grid_sample(
-                input=batch["mask"], grid=grid, mode='nearest', padding_mode="border", align_corners=False)
-            batch["E_mask"] = torch.nn.functional.grid_sample(
-                input=batch["E_mask"], grid=grid, mode='nearest', padding_mode="border", align_corners=False)
-            batch["vertices"] = torch.nn.functional.grid_sample(
-                input=batch["vertices"], grid=grid, mode='nearest', padding_mode="border", align_corners=False)
-            # Downsample and crop.
-            images = upfirdn2d.downsample2d(x=images, f=self.Hz_geom, down=2, padding=-Hz_pad*2, flip_filter=True)
-            batch["mask"] = torch.nn.functional.interpolate(batch["mask"][:, :, Hz_pad*2:-Hz_pad*2, Hz_pad*2:-Hz_pad*2], scale_factor=.5, mode="nearest", recompute_scale_factor=False)
-            batch["E_mask"] = torch.nn.functional.interpolate(batch["E_mask"][:, :, Hz_pad*2:-Hz_pad*2, Hz_pad*2:-Hz_pad*2], scale_factor=.5, mode="nearest", recompute_scale_factor=False)
-            batch["vertices"] = torch.nn.functional.interpolate(batch["vertices"][:, :, Hz_pad*2:-Hz_pad*2, Hz_pad*2:-Hz_pad*2], scale_factor=.5, mode="nearest", recompute_scale_factor=False)
-        # --------------------------------------------
-        # Select parameters for color transformations.
-        # --------------------------------------------
-        # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out
-        I_4 = torch.eye(4, device=device)
-        C = I_4
-        # Apply brightness with probability (brightness * strength).
-        if self.brightness > 0:
-            b = torch.randn([batch_size], device=device) * self.brightness_std
-            b = torch.where(torch.rand([batch_size], device=device) < self.brightness * self.p, b, torch.zeros_like(b))
-            if debug_percentile is not None:
-                b = torch.full_like(b, torch.erfinv(debug_percentile * 2 - 1) * self.brightness_std)
-            C = translate3d(b, b, b) @ C
-        # Apply contrast with probability (contrast * strength).
-        if self.contrast > 0:
-            c = torch.exp2(torch.randn([batch_size], device=device) * self.contrast_std)
-            c = torch.where(torch.rand([batch_size], device=device) < self.contrast * self.p, c, torch.ones_like(c))
-            if debug_percentile is not None:
-                c = torch.full_like(c, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.contrast_std))
-            C = scale3d(c, c, c) @ C
-        # Apply luma flip with probability (lumaflip * strength).
-        v = misc.constant(np.asarray([1, 1, 1, 0]) / np.sqrt(3), device=device) # Luma axis.
-        # Apply hue rotation with probability (hue * strength).
-        if self.hue > 0 and num_channels > 1:
-            theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.hue_max
-            theta = torch.where(torch.rand([batch_size], device=device) < self.hue * self.p, theta, torch.zeros_like(theta))
-            if debug_percentile is not None:
-                theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.hue_max)
-            C = rotate3d(v, theta) @ C # Rotate around v.
-        # Apply saturation with probability (saturation * strength).
-        if self.saturation > 0 and num_channels > 1:
-            s = torch.exp2(torch.randn([batch_size, 1, 1], device=device) * self.saturation_std)
-            s = torch.where(torch.rand([batch_size, 1, 1], device=device) < self.saturation * self.p, s, torch.ones_like(s))
-            if debug_percentile is not None:
-                s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.saturation_std))
-            C = (v.ger(v) + (I_4 - v.ger(v)) * s) @ C
-        # ------------------------------
-        # Execute color transformations.
-        # ------------------------------
-        # Execute if the transform is not identity.
-        if C is not I_4:
-            images = images.reshape([batch_size, num_channels, height * width])
-            if num_channels == 3:
-                images = C[:, :3, :3] @ images + C[:, :3, 3:]
-            elif num_channels == 1:
-                C = C[:, :3, :].mean(dim=1, keepdims=True)
-                images = images * C[:, :, :3].sum(dim=2, keepdims=True) + C[:, :, 3:]
-            else:
-                raise ValueError('Image must be RGB (3 channels) or L (1 channel)')
-            images = images.reshape([batch_size, num_channels, height, width])
-        # ----------------------
-        # Image-space filtering.
-        # ----------------------
-        if self.imgfilter > 0:
-            num_bands = self.Hz_fbank.shape[0]
-            assert len(self.imgfilter_bands) == num_bands
-            expected_power = misc.constant(np.array([10, 1, 1, 1]) / 13, device=device) # Expected power spectrum (1/f).
-            # Apply amplification for each band with probability (imgfilter * strength * band_strength).
-            g = torch.ones([batch_size, num_bands], device=device) # Global gain vector (identity).
-            for i, band_strength in enumerate(self.imgfilter_bands):
-                t_i = torch.exp2(torch.randn([batch_size], device=device) * self.imgfilter_std)
-                t_i = torch.where(torch.rand([batch_size], device=device) < self.imgfilter * self.p * band_strength, t_i, torch.ones_like(t_i))
-                if debug_percentile is not None:
-                    t_i = torch.full_like(t_i, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.imgfilter_std)) if band_strength > 0 else torch.ones_like(t_i)
-                t = torch.ones([batch_size, num_bands], device=device)                  # Temporary gain vector.
-                t[:, i] = t_i                                                           # Replace i'th element.
-                t = t / (expected_power * t.square()).sum(dim=-1, keepdims=True).sqrt() # Normalize power.
-                g = g * t                                                               # Accumulate into global gain.
-            # Construct combined amplification filter.
-            Hz_prime = g @ self.Hz_fbank                                    # [batch, tap]
-            Hz_prime = Hz_prime.unsqueeze(1).repeat([1, num_channels, 1])   # [batch, channels, tap]
-            Hz_prime = Hz_prime.reshape([batch_size * num_channels, 1, -1]) # [batch * channels, 1, tap]
-            # Apply filter.
-            p = self.Hz_fbank.shape[1] // 2
-            images = images.reshape([1, batch_size * num_channels, height, width])
-            images = torch.nn.functional.pad(input=images, pad=[p,p,p,p], mode='reflect')
-            images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(2), groups=batch_size*num_channels)
-            images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(3), groups=batch_size*num_channels)
-            images = images.reshape([batch_size, num_channels, height, width])
-        # ------------------------
-        # Image-space corruptions.
-        # ------------------------
-        batch["img"] = images
-        batch["vertices"] = batch["vertices"].long()
-        batch["border"] = 1 - batch["E_mask"] - batch["mask"]
-        return batch

dp2/data/transforms/transforms.py DELETED Viewed

@@ -1,247 +0,0 @@
-from pathlib import Path
-from typing import Dict, List
-import torchvision
-import torch
-import tops
-import torchvision.transforms.functional as F
-from .functional import hflip
-class RandomHorizontalFlip(torch.nn.Module):
-    def __init__(self, p: float,  flip_map=None,**kwargs):
-        super().__init__()
-        self.flip_ratio = p
-        self.flip_map = flip_map
-        if self.flip_ratio is None:
-            self.flip_ratio = 0.5
-        assert 0 <= self.flip_ratio <= 1
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        if torch.rand(1) > self.flip_ratio:
-            return container
-        return hflip(container, self.flip_map)
-class CenterCrop(torch.nn.Module):
-    """
-    Performs the transform on the image.
-    NOTE: Does not transform the mask to improve runtime.
-    """
-    def __init__(self, size: List[int]):
-        super().__init__()
-        self.size = tuple(size)
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        min_size = min(container["img"].shape[1], container["img"].shape[2])
-        if min_size < self.size[0]:
-            container["img"] = F.center_crop(container["img"], min_size)
-            container["img"] = F.resize(container["img"], self.size)
-            return container
-        container["img"] = F.center_crop(container["img"], self.size)
-        return container
-class Resize(torch.nn.Module):
-    """
-    Performs the transform on the image.
-    NOTE: Does not transform the mask to improve runtime.
-    """
-    def __init__(self, size, interpolation=F.InterpolationMode.BILINEAR):
-        super().__init__()
-        self.size = tuple(size)
-        self.interpolation = interpolation
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        container["img"] = F.resize(container["img"], self.size, self.interpolation, antialias=True)
-        if "semantic_mask" in container:
-            container["semantic_mask"] = F.resize(
-                container["semantic_mask"], self.size, F.InterpolationMode.NEAREST)
-        if "embedding" in container:
-            container["embedding"] = F.resize(
-                container["embedding"], self.size, self.interpolation)
-        if "mask" in container:
-            container["mask"] = F.resize(
-                container["mask"], self.size, F.InterpolationMode.NEAREST)
-        if "E_mask" in container:
-            container["E_mask"] = F.resize(
-                container["E_mask"], self.size, F.InterpolationMode.NEAREST)
-        if "maskrcnn_mask" in container:
-            container["maskrcnn_mask"] = F.resize(
-                container["maskrcnn_mask"], self.size, F.InterpolationMode.NEAREST)
-        if "vertices" in container:
-            container["vertices"] = F.resize(
-                container["vertices"], self.size, F.InterpolationMode.NEAREST)
-        return container
-    def __repr__(self):
-        repr = super().__repr__()
-        vars_ = dict(size=self.size, interpolation=self.interpolation)
-        return repr + " " +  " ".join([f"{k}: {v}" for k, v in vars_.items()])
-class InsertHRImage(torch.nn.Module):
-    """
-    Resizes mask by maxpool and assumes condition is already created
-    """
-    def __init__(self, size, interpolation=F.InterpolationMode.BILINEAR):
-        super().__init__()
-        self.size = tuple(size)
-        self.interpolation = interpolation
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        assert container["img"].dtype == torch.float32
-        container["img_hr"] = F.resize(container["img"], self.size, self.interpolation, antialias=True)
-        container["condition_hr"] = F.resize(container["condition"], self.size, self.interpolation, antialias=True)
-        mask = container["mask"] > 0
-        container["mask_hr"] = (torch.nn.functional.adaptive_max_pool2d(mask.logical_not().float(), output_size=self.size) > 0).logical_not().float()
-        container["condition_hr"] = container["condition_hr"] * (1 - container["mask_hr"]) + container["img_hr"] * container["mask_hr"]
-        return container
-    def __repr__(self):
-        repr = super().__repr__()
-        vars_ = dict(size=self.size, interpolation=self.interpolation)
-        return repr + " "
-class CopyHRImage(torch.nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        container["img_hr"] = container["img"]
-        container["condition_hr"] = container["condition"]
-        container["mask_hr"] = container["mask"]
-        return container
-class Resize2(torch.nn.Module):
-    """
-    Resizes mask by maxpool and assumes condition is already created
-    """
-    def __init__(self, size, interpolation=F.InterpolationMode.BILINEAR, downsample_condition: bool = True, mask_condition= True):
-        super().__init__()
-        self.size = tuple(size)
-        self.interpolation = interpolation
-        self.downsample_condition = downsample_condition
-        self.mask_condition = mask_condition
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-#        assert container["img"].dtype == torch.float32
-        container["img"] = F.resize(container["img"], self.size, self.interpolation, antialias=True)
-        mask = container["mask"] > 0
-        container["mask"] = (torch.nn.functional.adaptive_max_pool2d(mask.logical_not().float(), output_size=self.size) > 0).logical_not().float()
-        if self.downsample_condition:
-            container["condition"] = F.resize(container["condition"], self.size, self.interpolation, antialias=True)
-            if self.mask_condition:
-                container["condition"] = container["condition"] * (1 - container["mask"]) + container["img"] * container["mask"]
-        return container
-    def __repr__(self):
-        repr = super().__repr__()
-        vars_ = dict(size=self.size, interpolation=self.interpolation)
-        return repr + " " +  " ".join([f"{k}: {v}" for k, v in vars_.items()])
-class Normalize(torch.nn.Module):
-    """
-    Performs the transform on the image.
-    NOTE: Does not transform the mask to improve runtime.
-    """
-    def __init__(self, mean, std, inplace, keys=["img"]):
-        super().__init__()
-        self.mean = mean
-        self.std = std
-        self.inplace = inplace
-        self.keys = keys
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        for key in self.keys:
-            container[key] = F.normalize(container[key], self.mean, self.std, self.inplace)
-        return container
-    def __repr__(self):
-        repr = super().__repr__()
-        vars_ = dict(mean=self.mean, std=self.std, inplace=self.inplace)
-        return repr + " " + " ".join([f"{k}: {v}" for k, v in vars_.items()])
-class ToFloat(torch.nn.Module):
-    def __init__(self, keys=["img"], norm=True) -> None:
-        super().__init__()
-        self.keys = keys
-        self.gain = 255 if norm else 1
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        for key in self.keys:
-            container[key] = container[key].float() / self.gain
-        return container
-class RandomCrop(torchvision.transforms.RandomCrop):
-    """
-    Performs the transform on the image.
-    NOTE: Does not transform the mask to improve runtime.
-    """
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        container["img"] = super().forward(container["img"])
-        return container
-class CreateCondition(torch.nn.Module):
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        if container["img"].dtype == torch.uint8:
-            container["condition"] = container["img"] * container["mask"].byte() + (1-container["mask"].byte()) * 127
-            return container
-        container["condition"] = container["img"] * container["mask"]
-        return container
-class CreateEmbedding(torch.nn.Module):
-    def __init__(self, embed_path: Path, cuda=True) -> None:
-        super().__init__()
-        self.embed_map = torch.load(embed_path, map_location=torch.device("cpu"))
-        if cuda:
-            self.embed_map = tops.to_cuda(self.embed_map)
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        vertices = container["vertices"]
-        if vertices.ndim == 3:
-            embedding = self.embed_map[vertices.long()].squeeze(dim=0)
-            embedding = embedding.permute(2, 0, 1) * container["E_mask"]
-            pass
-        else:
-            assert vertices.ndim == 4
-            embedding = self.embed_map[vertices.long()].squeeze(dim=1)
-            embedding = embedding.permute(0, 3, 1, 2) * container["E_mask"]
-        container["embedding"] = embedding
-        container["embed_map"] = self.embed_map.clone()
-        return container
-class UpdateMask(torch.nn.Module):
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        container["mask"] = (container["img"] == container["condition"]).any(dim=1, keepdims=True).float()
-        return container
-class LoadClassEmbedding(torch.nn.Module):
-    def __init__(self, embedding_path: Path) -> None:
-        super().__init__()
-        self.embedding = torch.load(embedding_path, map_location="cpu")
-    def forward(self, container: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        key = "_".join(container["__key__"].split("train/")[-1].split("/")[:-1])
-        container["class_embedding"] = self.embedding[key].view(-1)
-        return container

dp2/data/utils.py DELETED Viewed

@@ -1,102 +0,0 @@
-import torch
-from PIL import Image
-import numpy as np
-import multiprocessing
-import io
-from tops import logger
-from torch.utils.data._utils.collate import default_collate
-try:
-    import pyspng
-    PYSPNG_IMPORTED = True
-except ImportError:
-    PYSPNG_IMPORTED = False
-    print("Could not load pyspng. Defaulting to pillow image backend.")
-    from PIL import Image
-def get_coco_keypoints():
-    return [
-        "nose",
-        "left_eye",
-        "right_eye",
-        "left_ear",
-        "right_ear",
-        "left_shoulder",
-        "right_shoulder",
-        "left_elbow",
-        "right_elbow",
-        "left_wrist",
-        "right_wrist",
-        "left_hip",
-        "right_hip",
-        "left_knee",
-        "right_knee",
-        "left_ankle",
-        "right_ankle",
-    ]
-def get_coco_flipmap():
-    keypoints = get_coco_keypoints()
-    keypoint_flip_map = {
-        "left_eye": "right_eye",
-        "left_ear": "right_ear",
-        "left_shoulder": "right_shoulder",
-        "left_elbow": "right_elbow",
-        "left_wrist": "right_wrist",
-        "left_hip": "right_hip",
-        "left_knee": "right_knee",
-        "left_ankle": "right_ankle",
-    }
-    for key, value in list(keypoint_flip_map.items()):
-        keypoint_flip_map[value] = key
-    keypoint_flip_map["nose"] = "nose"
-    keypoint_flip_map_idx = []
-    for source in keypoints:
-        keypoint_flip_map_idx.append(keypoints.index(keypoint_flip_map[source]))
-    return keypoint_flip_map_idx
-def mask_decoder(x):
-    mask = torch.from_numpy(np.array(Image.open(io.BytesIO(x)))).squeeze()[None]
-    mask = mask > 0  # This fixes bug causing  maskf.loat().max() == 255.
-    return mask
-def png_decoder(x):
-    if PYSPNG_IMPORTED:
-        return torch.from_numpy(np.rollaxis(pyspng.load(x), 2))
-    with Image.open(io.BytesIO(x)) as im:
-        im = torch.from_numpy(np.rollaxis(np.array(im.convert("RGB")), 2))
-    return im
-def jpg_decoder(x):
-    with Image.open(io.BytesIO(x)) as im:
-        im = torch.from_numpy(np.rollaxis(np.array(im.convert("RGB")), 2))
-    return im
-def get_num_workers(num_workers: int):
-    n_cpus = multiprocessing.cpu_count()
-    if num_workers > n_cpus:
-        logger.warn(f"Setting the number of workers to match cpu count: {n_cpus}")
-        return n_cpus
-    return num_workers
-def collate_fn(batch):
-    elem = batch[0]
-    ignore_keys = set(["embed_map", "vertx2cat"])
-    batch_ = {
-        key: default_collate([d[key] for d in batch])
-        for key in elem
-        if key not in ignore_keys
-    }
-    if "embed_map" in elem:
-        batch_["embed_map"] = elem["embed_map"]
-    if "vertx2cat" in elem:
-        batch_["vertx2cat"] = elem["vertx2cat"]
-    return batch_

dp2/detection/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .cse_mask_face_detector import CSeMaskFaceDetector
-from .person_detector import CSEPersonDetector
-from .structures import PersonDetection, VehicleDetection, FaceDetection

dp2/detection/base.py DELETED Viewed

@@ -1,45 +0,0 @@
-import pickle
-import torch
-import lzma
-from pathlib import Path
-from tops import logger
-class BaseDetector:
-    def __init__(self, cache_directory: str) -> None:
-        if cache_directory is not None:
-            self.cache_directory = Path(cache_directory, str(self.__class__.__name__))
-            self.cache_directory.mkdir(exist_ok=True, parents=True)
-    def save_to_cache(self, detection, cache_path: Path, after_preprocess=True):
-        logger.log(f"Caching detection to: {cache_path}")
-        with lzma.open(cache_path, "wb") as fp:
-            torch.save(
-                [det.state_dict(after_preprocess=after_preprocess) for det in detection], fp,
-                pickle_protocol=pickle.HIGHEST_PROTOCOL)
-    def load_from_cache(self, cache_path: Path):
-        logger.log(f"Loading detection from cache path: {cache_path}")
-        with lzma.open(cache_path, "rb") as fp:
-            state_dict = torch.load(fp)
-        return [
-            state["cls"].from_state_dict(state_dict=state) for state in state_dict
-        ]
-    def forward_and_cache(self, im: torch.Tensor, cache_id: str, load_cache: bool):
-        if cache_id is None:
-            return self.forward(im)
-        cache_path = self.cache_directory.joinpath(cache_id + ".torch")
-        if cache_path.is_file() and load_cache:
-            try:
-                return self.load_from_cache(cache_path)
-            except Exception as e:
-                logger.warn(f"The cache file was corrupted: {cache_path}")
-                exit()
-        detections = self.forward(im)
-        self.save_to_cache(detections, cache_path)
-        return detections

dp2/detection/box_utils.py DELETED Viewed

@@ -1,104 +0,0 @@
-import numpy as np
-def expand_bbox_to_ratio(bbox, imshape, target_aspect_ratio):
-    x0, y0, x1, y1 = [int(_) for _ in bbox]
-    h, w = y1 - y0, x1 - x0
-    cur_ratio = h / w
-    if cur_ratio == target_aspect_ratio:
-        return [x0, y0, x1, y1]
-    if cur_ratio < target_aspect_ratio:
-        target_height = int(w*target_aspect_ratio)
-        y0, y1 = expand_axis(y0, y1, target_height, imshape[0])
-    else:
-        target_width = int(h/target_aspect_ratio)
-        x0, x1 = expand_axis(x0, x1, target_width, imshape[1])
-    return x0, y0, x1, y1
-def expand_axis(start, end, target_width, limit):
-    # Can return a bbox outside of limit
-    cur_width = end - start
-    start = start - (target_width-cur_width)//2
-    end = end + (target_width-cur_width)//2
-    if end - start != target_width:
-        end += 1
-    assert end - start == target_width
-    if start < 0 and end > limit:
-        return start, end
-    if start < 0 and end < limit:
-        to_shift = min(0 - start, limit - end)
-        start += to_shift
-        end += to_shift
-    if end > limit and start > 0:
-        to_shift = min(end - limit, start)
-        end -= to_shift
-        start -= to_shift
-    assert end - start == target_width
-    return start, end
-def expand_box(bbox, imshape, mask, percentage_background: float):
-    assert isinstance(bbox[0], int)
-    assert 0 < percentage_background < 1
-    # Percentage in S
-    mask_pixels = mask.long().sum().cpu()
-    total_pixels = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
-    percentage_mask = mask_pixels / total_pixels
-    if (1 - percentage_mask) > percentage_background:
-        return bbox
-    target_pixels = mask_pixels / (1 - percentage_background)
-    x0, y0, x1, y1 = bbox
-    H = y1 - y0
-    W = x1 - x0
-    p = np.sqrt(target_pixels/(H*W))
-    target_width = int(np.ceil(p * W))
-    target_height = int(np.ceil(p * H))
-    x0, x1 = expand_axis(x0, x1, target_width, imshape[1])
-    y0, y1 = expand_axis(y0, y1, target_height, imshape[0])
-    return [x0, y0, x1, y1]
-def expand_axises_by_percentage(bbox_XYXY, imshape, percentage):
-    x0, y0, x1, y1 = bbox_XYXY
-    H = y1 - y0
-    W = x1 - x0
-    expansion = int(((H*W)**0.5) * percentage)
-    new_width = W + expansion
-    new_height = H + expansion
-    x0, x1 = expand_axis(x0, x1, min(new_width, imshape[1]), imshape[1])
-    y0, y1 = expand_axis(y0, y1, min(new_height, imshape[0]), imshape[0])
-    return [x0, y0, x1, y1]
-def get_expanded_bbox(
-        bbox_XYXY,
-        imshape,
-        mask,
-        percentage_background: float,
-        axis_minimum_expansion: float,
-        target_aspect_ratio: float):
-    bbox_XYXY = bbox_XYXY.long().cpu().numpy().tolist()
-    # Expand each axis of the bounding box by a minimum percentage
-    bbox_XYXY = expand_axises_by_percentage(bbox_XYXY, imshape, axis_minimum_expansion)
-    # Find the minimum bbox with the aspect ratio. Can be outside of imshape
-    bbox_XYXY = expand_bbox_to_ratio(bbox_XYXY, imshape, target_aspect_ratio)
-    # Expands square box such that X% of the bbox is background
-    bbox_XYXY = expand_box(bbox_XYXY, imshape, mask, percentage_background)
-    assert isinstance(bbox_XYXY[0], (int, np.int64))
-    return bbox_XYXY
-def include_box(bbox, minimum_area, aspect_ratio_range, min_bbox_ratio_inside, imshape):
-    def area_inside_ratio(bbox, imshape):
-        area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
-        area_inside = (min(bbox[2], imshape[1]) - max(0,bbox[0])) * (min(imshape[0],bbox[3]) - max(0,bbox[1]))
-        return area_inside / area
-    ratio = (bbox[3] - bbox[1]) / (bbox[2] - bbox[0])
-    area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
-    if area_inside_ratio(bbox, imshape) < min_bbox_ratio_inside:
-        return False
-    if ratio <= aspect_ratio_range[0] or ratio >= aspect_ratio_range[1] or area < minimum_area:
-        return False
-    return True

dp2/detection/box_utils_fdf.py DELETED Viewed

@@ -1,203 +0,0 @@
-"""
-The FDF dataset expands bound boxes differently from what is used for CSE.
-"""
-import numpy as np
-def quadratic_bounding_box(x0, y0, width, height, imshape):
-    # We assume that we can create a image that is quadratic without
-    # minimizing any of the sides
-    assert width <= min(imshape[:2])
-    assert height <= min(imshape[:2])
-    min_side = min(height, width)
-    if height != width:
-        side_diff = abs(height - width)
-        # Want to extend the shortest side
-        if min_side == height:
-            # Vertical side
-            height += side_diff
-            if height > imshape[0]:
-                # Take full frame, and shrink width
-                y0 = 0
-                height = imshape[0]
-                side_diff = abs(height - width)
-                width -= side_diff
-                x0 += side_diff // 2
-            else:
-                y0 -= side_diff // 2
-                y0 = max(0, y0)
-        else:
-            # Horizontal side
-            width += side_diff
-            if width > imshape[1]:
-                # Take full frame width, and shrink height
-                x0 = 0
-                width = imshape[1]
-                side_diff = abs(height - width)
-                height -= side_diff
-                y0 += side_diff // 2
-            else:
-                x0 -= side_diff // 2
-                x0 = max(0, x0)
-        # Check that bbox goes outside image
-        x1 = x0 + width
-        y1 = y0 + height
-        if imshape[1] < x1:
-            diff = x1 - imshape[1]
-            x0 -= diff
-        if imshape[0] < y1:
-            diff = y1 - imshape[0]
-            y0 -= diff
-    assert x0 >= 0, "Bounding box outside image."
-    assert y0 >= 0, "Bounding box outside image."
-    assert x0 + width <= imshape[1], "Bounding box outside image."
-    assert y0 + height <= imshape[0], "Bounding box outside image."
-    return x0, y0, width, height
-def expand_bounding_box(bbox, percentage, imshape):
-    orig_bbox = bbox.copy()
-    x0, y0, x1, y1 = bbox
-    width = x1 - x0
-    height = y1 - y0
-    x0, y0, width, height = quadratic_bounding_box(
-        x0, y0, width, height, imshape)
-    expanding_factor = int(max(height, width) * percentage)
-    possible_max_expansion = [(imshape[0] - width) // 2,
-                              (imshape[1] - height) // 2,
-                              expanding_factor]
-    expanding_factor = min(possible_max_expansion)
-    # Expand height
-    if expanding_factor > 0:
-        y0 = y0 - expanding_factor
-        y0 = max(0, y0)
-        height += expanding_factor * 2
-        if height > imshape[0]:
-            y0 -= (imshape[0] - height)
-            height = imshape[0]
-        if height + y0 > imshape[0]:
-            y0 -= (height + y0 - imshape[0])
-        # Expand width
-        x0 = x0 - expanding_factor
-        x0 = max(0, x0)
-        width += expanding_factor * 2
-        if width > imshape[1]:
-            x0 -= (imshape[1] - width)
-            width = imshape[1]
-        if width + x0 > imshape[1]:
-            x0 -= (width + x0 - imshape[1])
-    y1 = y0 + height
-    x1 = x0 + width
-    assert y0 >= 0, "Y0 is minus"
-    assert height <= imshape[0], "Height is larger than image."
-    assert x0 + width <= imshape[1]
-    assert y0 + height <= imshape[0]
-    assert width == height, "HEIGHT IS NOT EQUAL WIDTH!!"
-    assert x0 >= 0, "Y0 is minus"
-    assert width <= imshape[1], "Height is larger than image."
-    # Check that original bbox is within new
-    x0_o, y0_o, x1_o, y1_o = orig_bbox
-    assert x0 <= x0_o, f"New bbox is outisde of original. O:{x0_o}, N: {x0}"
-    assert x1 >= x1_o, f"New bbox is outisde of original. O:{x1_o}, N: {x1}"
-    assert y0 <= y0_o, f"New bbox is outisde of original. O:{y0_o}, N: {y0}"
-    assert y1 >= y1_o, f"New bbox is outisde of original. O:{y1_o}, N: {y1}"
-    x0, y0, width, height = [int(_) for _ in [x0, y0, width, height]]
-    x1 = x0 + width
-    y1 = y0 + height
-    return np.array([x0, y0, x1, y1])
-def is_keypoint_within_bbox(x0, y0, x1, y1, keypoint):
-    keypoint = keypoint[:, :3]  # only nose + eyes are relevant
-    kp_X = keypoint[0, :]
-    kp_Y = keypoint[1, :]
-    within_X = np.all(kp_X >= x0) and np.all(kp_X <= x1)
-    within_Y = np.all(kp_Y >= y0) and np.all(kp_Y <= y1)
-    return within_X and within_Y
-def expand_bbox_simple(bbox, percentage):
-    x0, y0, x1, y1 = bbox.astype(float)
-    width = x1 - x0
-    height = y1 - y0
-    x_c = int(x0) + width // 2
-    y_c = int(y0) + height // 2
-    avg_size = max(width, height)
-    new_width = avg_size * (1 + percentage)
-    x0 = x_c - new_width // 2
-    y0 = y_c - new_width // 2
-    x1 = x_c + new_width // 2
-    y1 = y_c + new_width // 2
-    return np.array([x0, y0, x1, y1]).astype(int)
-def pad_image(im, bbox, pad_value):
-    x0, y0, x1, y1 = bbox
-    if x0 < 0:
-        pad_im = np.zeros((im.shape[0], abs(x0), im.shape[2]),
-                          dtype=np.uint8) + pad_value
-        im = np.concatenate((pad_im, im), axis=1)
-        x1 += abs(x0)
-        x0 = 0
-    if y0 < 0:
-        pad_im = np.zeros((abs(y0), im.shape[1], im.shape[2]),
-                          dtype=np.uint8) + pad_value
-        im = np.concatenate((pad_im, im), axis=0)
-        y1 += abs(y0)
-        y0 = 0
-    if x1 >= im.shape[1]:
-        pad_im = np.zeros(
-            (im.shape[0], x1 - im.shape[1] + 1, im.shape[2]),
-            dtype=np.uint8) + pad_value
-        im = np.concatenate((im, pad_im), axis=1)
-    if y1 >= im.shape[0]:
-        pad_im = np.zeros(
-            (y1 - im.shape[0] + 1, im.shape[1], im.shape[2]),
-            dtype=np.uint8) + pad_value
-        im = np.concatenate((im, pad_im), axis=0)
-    return im[y0:y1, x0:x1]
-def clip_box(bbox, im):
-    bbox[0] = max(0, bbox[0])
-    bbox[1] = max(0, bbox[1])
-    bbox[2] = min(im.shape[1] - 1, bbox[2])
-    bbox[3] = min(im.shape[0] - 1, bbox[3])
-    return bbox
-def cut_face(im, bbox, simple_expand=False, pad_value=0, pad_im=True):
-    outside_im = (bbox < 0).any() or bbox[2] > im.shape[1] or bbox[3] > im.shape[0]
-    if simple_expand or (outside_im and pad_im):
-        return pad_image(im, bbox, pad_value)
-    bbox = clip_box(bbox, im)
-    x0, y0, x1, y1 = bbox
-    return im[y0:y1, x0:x1]
-def expand_bbox(
-        bbox_ltrb, imshape, simple_expand, default_to_simple=False,
-        expansion_factor=0.35):
-    assert bbox_ltrb.shape == (4,), f"BBox shape was: {bbox.shape}"
-    bbox = bbox_ltrb.astype(float)
-    # FDF256 uses simple expand with ratio 0.4
-    if simple_expand:
-        return expand_bbox_simple(bbox, 0.4)
-    try:
-        return expand_bounding_box(bbox, expansion_factor, imshape)
-    except AssertionError:
-        return expand_bbox_simple(bbox, expansion_factor * 2)

dp2/detection/cse_mask_face_detector.py DELETED Viewed

@@ -1,116 +0,0 @@
-import torch
-import lzma
-import tops
-from pathlib import Path
-from dp2.detection.base import BaseDetector
-from .utils import combine_cse_maskrcnn_dets
-from face_detection import build_detector as build_face_detector
-from .models.cse import CSEDetector
-from .models.mask_rcnn import MaskRCNNDetector
-from .structures import CSEPersonDetection, VehicleDetection, FaceDetection, PersonDetection
-from tops import logger
-def box1_inside_box2(box1: torch.Tensor, box2: torch.Tensor):
-    assert len(box1.shape) == 2
-    assert len(box2.shape) == 2
-    box1_inside = torch.zeros(box1.shape[0], device=box1.device, dtype=torch.bool)
-    # This can be batched
-    for i, box in enumerate(box1):
-        is_outside_lefttop = (box[None, [0, 1]] <= box2[:, [0, 1]]).any(dim=1)
-        is_outside_rightbot = (box[None, [2, 3]] >= box2[:, [2, 3]]).any(dim=1)
-        is_outside = is_outside_lefttop.logical_or(is_outside_rightbot)
-        box1_inside[i] = is_outside.logical_not().any()
-    return box1_inside
-class CSeMaskFaceDetector(BaseDetector):
-    def __init__(
-            self,
-            mask_rcnn_cfg,
-            face_detector_cfg: dict,
-            cse_cfg: dict,
-            face_post_process_cfg: dict,
-            cse_post_process_cfg,
-            score_threshold: float,
-            **kwargs
-            ) -> None:
-        super().__init__(**kwargs)
-        self.mask_rcnn = MaskRCNNDetector(**mask_rcnn_cfg, score_thres=score_threshold)
-        if "confidence_threshold" not in face_detector_cfg:
-            face_detector_cfg["confidence_threshold"] = score_threshold
-        if "score_thres" not in cse_cfg:
-            cse_cfg["score_thres"] = score_threshold
-        self.cse_detector = CSEDetector(**cse_cfg)
-        self.face_detector = build_face_detector(**face_detector_cfg, clip_boxes=True)
-        self.cse_post_process_cfg = cse_post_process_cfg
-        self.face_mean = tops.to_cuda(torch.from_numpy(self.face_detector.mean).view(3, 1, 1))
-        self.mask_cse_iou_combine_threshold = self.cse_post_process_cfg.pop("iou_combine_threshold")
-        self.face_post_process_cfg = face_post_process_cfg
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    def _detect_faces(self, im: torch.Tensor):
-        H, W = im.shape[1:]
-        im = im.float() - self.face_mean
-        im = self.face_detector.resize(im[None], 1.0)
-        boxes_XYXY = self.face_detector._batched_detect(im)[0][:, :-1] # Remove score
-        boxes_XYXY[:, [0, 2]] *= W
-        boxes_XYXY[:, [1, 3]] *= H
-        return boxes_XYXY.round().long()
-    def load_from_cache(self, cache_path: Path):
-        logger.log(f"Loading detection from cache path: {cache_path}",)
-        with lzma.open(cache_path, "rb") as fp:
-            state_dict = torch.load(fp, map_location="cpu")
-        kwargs = dict(
-            post_process_cfg=self.cse_post_process_cfg,
-            embed_map=self.cse_detector.embed_map,
-            **self.face_post_process_cfg
-        )
-        return [
-            state["cls"].from_state_dict(**kwargs, state_dict=state)
-            for state in state_dict
-        ]
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor):
-        maskrcnn_dets = self.mask_rcnn(im)
-        cse_dets = self.cse_detector(im)
-        embed_map = self.cse_detector.embed_map
-        print("Calling face detector.")
-        face_boxes = self._detect_faces(im).cpu()
-        maskrcnn_person = {
-            k: v[maskrcnn_dets["is_person"]] for k, v in maskrcnn_dets.items()
-        }
-        maskrcnn_other = {
-            k: v[maskrcnn_dets["is_person"].logical_not()] for k, v in maskrcnn_dets.items()
-        }
-        maskrcnn_other = VehicleDetection(maskrcnn_other["segmentation"])
-        combined_segmentation, cse_dets, matches = combine_cse_maskrcnn_dets(
-            maskrcnn_person["segmentation"], cse_dets, self.mask_cse_iou_combine_threshold)
-        persons_with_cse = CSEPersonDetection(
-            combined_segmentation, cse_dets, **self.cse_post_process_cfg,
-            embed_map=embed_map,orig_imshape_CHW=im.shape
-        )
-        persons_with_cse.pre_process()
-        not_matched = [i for i in range(maskrcnn_person["segmentation"].shape[0]) if i not in matches[:, 0]]
-        persons_without_cse = PersonDetection(
-            maskrcnn_person["segmentation"][not_matched], **self.cse_post_process_cfg,
-            orig_imshape_CHW=im.shape
-        )
-        persons_without_cse.pre_process()
-        face_boxes_covered = box1_inside_box2(face_boxes, persons_with_cse.dilated_boxes).logical_or(
-            box1_inside_box2(face_boxes, persons_without_cse.dilated_boxes)
-        )
-        face_boxes = face_boxes[face_boxes_covered.logical_not()]
-        face_boxes = FaceDetection(face_boxes, **self.face_post_process_cfg)
-        # Order matters. The anonymizer will anonymize FIFO.
-        # Later detections will overwrite.
-        all_detections = [face_boxes, maskrcnn_other, persons_without_cse, persons_with_cse]
-        return all_detections

dp2/detection/face_detector.py DELETED Viewed

@@ -1,62 +0,0 @@
-import torch
-import lzma
-import tops
-from pathlib import Path
-from dp2.detection.base import BaseDetector
-from face_detection import build_detector as build_face_detector
-from .structures import FaceDetection
-from tops import logger
-def box1_inside_box2(box1: torch.Tensor, box2: torch.Tensor):
-    assert len(box1.shape) == 2
-    assert len(box2.shape) == 2
-    box1_inside = torch.zeros(box1.shape[0], device=box1.device, dtype=torch.bool)
-    # This can be batched
-    for i, box in enumerate(box1):
-        is_outside_lefttop = (box[None, [0, 1]] <= box2[:, [0, 1]]).any(dim=1)
-        is_outside_rightbot = (box[None, [2, 3]] >= box2[:, [2, 3]]).any(dim=1)
-        is_outside = is_outside_lefttop.logical_or(is_outside_rightbot)
-        box1_inside[i] = is_outside.logical_not().any()
-    return box1_inside
-class FaceDetector(BaseDetector):
-    def __init__(
-            self,
-            face_detector_cfg: dict,
-            score_threshold: float,
-            face_post_process_cfg: dict,
-            **kwargs
-            ) -> None:
-        super().__init__(**kwargs)
-        self.face_detector = build_face_detector(**face_detector_cfg, confidence_threshold=score_threshold)
-        self.face_mean = tops.to_cuda(torch.from_numpy(self.face_detector.mean).view(3, 1, 1))
-        self.face_post_process_cfg = face_post_process_cfg
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    def _detect_faces(self, im: torch.Tensor):
-        H, W = im.shape[1:]
-        im = im.float() - self.face_mean
-        im = self.face_detector.resize(im[None], 1.0)
-        boxes_XYXY = self.face_detector._batched_detect(im)[0][:, :-1] # Remove score
-        boxes_XYXY[:, [0, 2]] *= W
-        boxes_XYXY[:, [1, 3]] *= H
-        return boxes_XYXY.round().long().cpu()
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor):
-        face_boxes = self._detect_faces(im)
-        face_boxes = FaceDetection(face_boxes, **self.face_post_process_cfg)
-        return [face_boxes]
-    def load_from_cache(self, cache_path: Path):
-        logger.log(f"Loading detection from cache path: {cache_path}")
-        with lzma.open(cache_path, "rb") as fp:
-            state_dict = torch.load(fp)
-        return [
-            state["cls"].from_state_dict(state_dict=state, **self.face_post_process_cfg) for state in state_dict
-        ]

dp2/detection/models/__init__.py DELETED Viewed

File without changes

dp2/detection/models/cse.py DELETED Viewed

@@ -1,135 +0,0 @@
-import torch
-from typing import List
-import tops
-from torchvision.transforms.functional import InterpolationMode, resize
-from densepose.data.utils import get_class_to_mesh_name_mapping
-from densepose import add_densepose_config
-from densepose.structures import DensePoseEmbeddingPredictorOutput
-from densepose.vis.extractor import DensePoseOutputsExtractor
-from densepose.modeling import build_densepose_embedder
-from detectron2.config import get_cfg
-from detectron2.data.transforms import ResizeShortestEdge
-from detectron2.checkpoint.detection_checkpoint import DetectionCheckpointer
-from detectron2.modeling import build_model
-model_urls = {
-    "https://raw.githubusercontent.com/facebookresearch/detectron2/main/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml": "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x/250713061/model_final_1d3314.pkl",
-    "https://raw.githubusercontent.com/facebookresearch/detectron2/main/projects/DensePose/configs/cse/densepose_rcnn_R_50_FPN_s1x.yaml": "https://dl.fbaipublicfiles.com/densepose/cse/densepose_rcnn_R_50_FPN_s1x/251155172/model_final_c4ea5f.pkl",
-}
-def cse_det_to_global(boxes_XYXY, S: torch.Tensor, imshape):
-    assert len(S.shape) == 3
-    H, W = imshape
-    N = len(boxes_XYXY)
-    segmentation = torch.zeros((N, H, W), dtype=torch.bool, device=S.device)
-    boxes_XYXY = boxes_XYXY.long()
-    for i in range(N):
-        x0, y0, x1, y1 = boxes_XYXY[i]
-        assert x0 >= 0 and y0 >= 0
-        assert x1 <= imshape[1]
-        assert y1 <= imshape[0]
-        h = y1 - y0
-        w = x1 - x0
-        segmentation[i:i+1, y0:y1, x0:x1] = resize(S[i:i+1], (h, w), interpolation=InterpolationMode.NEAREST) > 0
-    return segmentation
-class CSEDetector:
-    def __init__(
-            self,
-            cfg_url: str = "https://raw.githubusercontent.com/facebookresearch/detectron2/main/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml",
-            cfg_2_download: List[str] = [
-                "https://raw.githubusercontent.com/facebookresearch/detectron2/main/projects/DensePose/configs/cse/densepose_rcnn_R_101_FPN_DL_soft_s1x.yaml",
-                "https://raw.githubusercontent.com/facebookresearch/detectron2/main/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN.yaml",
-                "https://raw.githubusercontent.com/facebookresearch/detectron2/main/projects/DensePose/configs/cse/Base-DensePose-RCNN-FPN-Human.yaml"],
-            score_thres: float = 0.9,
-            nms_thresh: float = None,
-            ) -> None:
-        with tops.logger.capture_log_stdout():
-            cfg = get_cfg()
-            self.device = tops.get_device()
-            add_densepose_config(cfg)
-        cfg_path = tops.download_file(cfg_url)
-        for p in cfg_2_download:
-            tops.download_file(p)
-        with tops.logger.capture_log_stdout():
-            cfg.merge_from_file(cfg_path)
-        assert cfg_url in model_urls, cfg_url
-        model_path = tops.download_file(model_urls[cfg_url])
-        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_thres
-        if nms_thresh is not None:
-            cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = nms_thresh
-        cfg.MODEL.WEIGHTS = str(model_path)
-        cfg.MODEL.DEVICE = str(self.device)
-        cfg.freeze()
-        with tops.logger.capture_log_stdout():
-            self.model = build_model(cfg)
-            self.model.eval()
-            DetectionCheckpointer(self.model).load(str(model_path))
-            self.input_format = cfg.INPUT.FORMAT
-            self.densepose_extractor = DensePoseOutputsExtractor()
-            self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
-            self.embedder = build_densepose_embedder(cfg)
-            self.mesh_vertex_embeddings = {
-                mesh_name: self.embedder(mesh_name).to(self.device)
-                for mesh_name in self.class_to_mesh_name.values()
-                if self.embedder.has_embeddings(mesh_name)
-            }
-            self.cfg = cfg
-            self.embed_map = self.mesh_vertex_embeddings["smpl_27554"]
-        tops.logger.log("CSEDetector built.")
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    def resize_im(self, im):
-        H, W = im.shape[1:]
-        newH, newW = ResizeShortestEdge.get_output_shape(
-            H, W, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST)
-        return resize(
-            im, (newH, newW), InterpolationMode.BILINEAR, antialias=True)
-    @torch.no_grad()
-    def forward(self, im):
-        assert im.dtype == torch.uint8
-        if self.input_format == "BGR":
-            im = im.flip(0)
-        H, W = im.shape[1:]
-        im = self.resize_im(im)
-        output = self.model([{"image": im, "height": H, "width": W}])[0]["instances"]
-        scores = output.get("scores")
-        if len(scores) == 0:
-            return dict(
-                instance_segmentation=torch.empty((0, 0, 112, 112), dtype=torch.bool, device=im.device),
-                instance_embedding=torch.empty((0, 16, 112, 112), dtype=torch.float32, device=im.device),
-                embed_map=self.mesh_vertex_embeddings["smpl_27554"],
-                bbox_XYXY=torch.empty((0, 4), dtype=torch.long, device=im.device),
-                im_segmentation=torch.empty((0, H, W), dtype=torch.bool, device=im.device),
-                scores=torch.empty((0), dtype=torch.float, device=im.device)
-            )
-        pred_densepose, boxes_xywh, classes = self.densepose_extractor(output)
-        assert isinstance(pred_densepose, DensePoseEmbeddingPredictorOutput), pred_densepose
-        S = pred_densepose.coarse_segm.argmax(dim=1) # Segmentation channel Nx2xHxW (2 because only 2 classes)
-        E = pred_densepose.embedding
-        mesh_name = self.class_to_mesh_name[classes[0]]
-        assert mesh_name == "smpl_27554"
-        x0, y0, w, h = [boxes_xywh[:, i] for i in range(4)]
-        boxes_XYXY = torch.stack((x0, y0, x0+w, y0+h), dim=-1)
-        boxes_XYXY = boxes_XYXY.round_().long()
-        non_empty_boxes = (boxes_XYXY[:, :2] == boxes_XYXY[:, 2:]).any(dim=1).logical_not()
-        S = S[non_empty_boxes]
-        E = E[non_empty_boxes]
-        boxes_XYXY = boxes_XYXY[non_empty_boxes]
-        scores = scores[non_empty_boxes]
-        im_segmentation = cse_det_to_global(boxes_XYXY, S, [H, W])
-        return dict(
-            instance_segmentation=S, instance_embedding=E,
-            bbox_XYXY=boxes_XYXY,
-            im_segmentation=im_segmentation,
-            scores=scores.view(-1))

dp2/detection/models/keypoint_maskrcnn.py DELETED Viewed

@@ -1,111 +0,0 @@
-import numpy as np
-import torch
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.modeling.roi_heads import CascadeROIHeads, StandardROIHeads
-from detectron2.data.transforms import ResizeShortestEdge
-from detectron2.structures import Instances
-from detectron2 import model_zoo
-from detectron2.config import instantiate
-from detectron2.config import LazyCall as L
-from PIL import Image
-import tops
-import functools
-from torchvision.transforms.functional import resize
-def get_rn50_fpn_keypoint_rcnn(weight_path: str):
-    from detectron2.modeling.poolers import ROIPooler
-    from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
-    from detectron2.layers import ShapeSpec
-    model = model_zoo.get_config("common/models/mask_rcnn_fpn.py").model
-    model.roi_heads.update(
-        num_classes=1,
-        keypoint_in_features=["p2", "p3", "p4", "p5"],
-        keypoint_pooler=L(ROIPooler)(
-            output_size=14,
-            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
-            sampling_ratio=0,
-            pooler_type="ROIAlignV2",
-        ),
-        keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
-            input_shape=ShapeSpec(channels=256, width=14, height=14),
-            num_keypoints=17,
-            conv_dims=[512] * 8,
-            loss_normalizer="visible",
-        ),
-    )
-    # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
-    # 1000 proposals per-image is found to hurt box AP.
-    # Therefore we increase it to 1500 per-image.
-    model.proposal_generator.post_nms_topk = (1500, 1000)
-    # Keypoint AP degrades (though box AP improves) when using plain L1 loss
-    model.roi_heads.box_predictor.smooth_l1_beta = 0.5
-    model = instantiate(model)
-    dataloader = model_zoo.get_config("common/data/coco_keypoint.py").dataloader
-    test_transform = instantiate(dataloader.test.mapper.augmentations)
-    DetectionCheckpointer(model).load(weight_path)
-    return model, test_transform
-models = {
-    "rn50_fpn_maskrcnn": functools.partial(get_rn50_fpn_keypoint_rcnn, weight_path="https://folk.ntnu.no/haakohu/checkpoints/maskrcnn_keypoint/keypoint_maskrcnn_R_50_FPN_1x.pth")
-}
-class KeypointMaskRCNN:
-    def __init__(self, model_name: str, score_threshold: float) -> None:
-        assert model_name in models, f"Did not find {model_name} in models"
-        model, test_transform = models[model_name]()
-        self.model = model.eval().to(tops.get_device())
-        if isinstance(self.model.roi_heads, CascadeROIHeads):
-            for head in self.model.roi_heads.box_predictors:
-                assert hasattr(head, "test_score_thresh")
-                head.test_score_thresh = score_threshold
-        else:
-            assert isinstance(self.model.roi_heads, StandardROIHeads)
-            assert hasattr(self.model.roi_heads.box_predictor, "test_score_thresh")
-            self.model.roi_heads.box_predictor.test_score_thresh = score_threshold
-        self.test_transform = test_transform
-        assert len(self.test_transform) == 1
-        self.test_transform = self.test_transform[0]
-        assert isinstance(self.test_transform, ResizeShortestEdge)
-        assert self.test_transform.interp == Image.BILINEAR
-        self.image_format = self.model.input_format
-    def resize_im(self, im):
-        H, W = im.shape[-2:]
-        if self.test_transform.is_range:
-            size = np.random.randint(self.test_transform.short_edge_length[0], self.test_transform.short_edge_length[1] + 1)
-        else:
-            size = np.random.choice(self.test_transform.short_edge_length)
-        newH, newW = ResizeShortestEdge.get_output_shape(H, W, size, self.test_transform.max_size)
-        return resize(
-            im, (newH, newW), antialias=True)
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor) -> Instances:
-        assert im.ndim == 3
-        if self.image_format == "BGR":
-            im = im.flip(0)
-        H, W = im.shape[-2:]
-        im = self.resize_im(im)
-        im = im.float()
-        inputs = dict(image=im, height=H, width=W)
-        # instances contains
-        # dict_keys(['pred_boxes', 'scores', 'pred_classes', 'pred_masks', 'pred_keypoints', 'pred_keypoint_heatmaps'])
-        instances = self.model([inputs])[0]["instances"]
-        return dict(
-            scores=instances.get("scores").cpu(),
-            segmentation=instances.get("pred_masks").cpu(),
-            keypoints=instances.get("pred_keypoints").cpu()
-        )

dp2/detection/models/mask_rcnn.py DELETED Viewed

@@ -1,78 +0,0 @@
-import torch
-import tops
-from detectron2.modeling import build_model
-from detectron2.checkpoint import DetectionCheckpointer
-from detectron2.structures import Boxes
-from detectron2.data import MetadataCatalog
-from detectron2 import model_zoo
-from typing import Dict
-from detectron2.data.transforms import ResizeShortestEdge
-from torchvision.transforms.functional import resize
-model_urls = {
-    "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml": "https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl",
-}
-class MaskRCNNDetector:
-    def __init__(
-            self,
-            cfg_name: str = "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml",
-            score_thres: float = 0.9,
-            class_filter=["person"], #["car", "bicycle","truck", "bus",  "backpack"]
-            fp16_inference: bool = False
-            ) -> None:
-        cfg = model_zoo.get_config(cfg_name)
-        cfg.MODEL.DEVICE = str(tops.get_device())
-        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_thres
-        cfg.freeze()
-        self.cfg = cfg
-        with tops.logger.capture_log_stdout():
-            self.model = build_model(cfg)
-            DetectionCheckpointer(self.model).load(model_urls[cfg_name])
-        self.model.eval()
-        self.input_format = cfg.INPUT.FORMAT
-        self.class_names = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes
-        self.class_to_keep = set([self.class_names.index(cls_) for cls_ in class_filter])
-        self.person_class = self.class_names.index("person")
-        self.fp16_inference = fp16_inference
-        tops.logger.log("Mask R-CNN built.")
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    def resize_im(self, im):
-        H, W = im.shape[1:]
-        newH, newW = ResizeShortestEdge.get_output_shape(
-            H, W, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST)
-        return resize(
-            im, (newH, newW), antialias=True)
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor):
-        if self.input_format == "BGR":
-            im = im.flip(0)
-        else:
-            assert self.input_format == "RGB"
-        H, W = im.shape[-2:]
-        im = self.resize_im(im)
-        with torch.cuda.amp.autocast(enabled=self.fp16_inference):
-            output = self.model([{"image": im, "height": H, "width": W}])[0]["instances"]
-        scores = output.get("scores")
-        N = len(scores)
-        classes = output.get("pred_classes")
-        idx2keep = [i for i in range(N) if classes[i].tolist() in self.class_to_keep]
-        classes = classes[idx2keep]
-        assert isinstance(output.get("pred_boxes"), Boxes)
-        segmentation = output.get("pred_masks")[idx2keep]
-        assert segmentation.dtype == torch.bool
-        is_person = classes == self.person_class
-        return {
-            "scores": output.get("scores")[idx2keep],
-            "segmentation": segmentation,
-            "classes": output.get("pred_classes")[idx2keep],
-            "is_person": is_person
-        }

dp2/detection/person_detector.py DELETED Viewed

@@ -1,135 +0,0 @@
-import torch
-import lzma
-from dp2.detection.base import BaseDetector
-from .utils import combine_cse_maskrcnn_dets
-from .models.cse import CSEDetector
-from .models.mask_rcnn import MaskRCNNDetector
-from .models.keypoint_maskrcnn import KeypointMaskRCNN
-from .structures import CSEPersonDetection, PersonDetection
-from pathlib import Path
-class CSEPersonDetector(BaseDetector):
-    def __init__(
-        self,
-        score_threshold: float,
-        mask_rcnn_cfg: dict,
-        cse_cfg: dict,
-        cse_post_process_cfg: dict,
-        **kwargs
-    ) -> None:
-        super().__init__(**kwargs)
-        self.mask_rcnn = MaskRCNNDetector(**mask_rcnn_cfg, score_thres=score_threshold)
-        self.cse_detector = CSEDetector(**cse_cfg, score_thres=score_threshold)
-        self.post_process_cfg = cse_post_process_cfg
-        self.iou_combine_threshold = self.post_process_cfg.pop("iou_combine_threshold")
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    def load_from_cache(self, cache_path: Path):
-        with lzma.open(cache_path, "rb") as fp:
-            state_dict = torch.load(fp)
-        kwargs = dict(
-            post_process_cfg=self.post_process_cfg,
-            embed_map=self.cse_detector.embed_map,
-        )
-        return [
-            state["cls"].from_state_dict(**kwargs, state_dict=state)
-            for state in state_dict
-        ]
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor, cse_dets=None):
-        mask_dets = self.mask_rcnn(im)
-        if cse_dets is None:
-            cse_dets = self.cse_detector(im)
-        segmentation = mask_dets["segmentation"]
-        segmentation, cse_dets, _ = combine_cse_maskrcnn_dets(
-            segmentation, cse_dets, self.iou_combine_threshold
-        )
-        det = CSEPersonDetection(
-            segmentation=segmentation,
-            cse_dets=cse_dets,
-            embed_map=self.cse_detector.embed_map,
-            orig_imshape_CHW=im.shape,
-            **self.post_process_cfg
-        )
-        return [det]
-class MaskRCNNPersonDetector(BaseDetector):
-    def __init__(
-        self,
-        score_threshold: float,
-        mask_rcnn_cfg: dict,
-        cse_post_process_cfg: dict,
-        **kwargs
-    ) -> None:
-        super().__init__(**kwargs)
-        self.mask_rcnn = MaskRCNNDetector(**mask_rcnn_cfg, score_thres=score_threshold)
-        self.post_process_cfg = cse_post_process_cfg
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    def load_from_cache(self, cache_path: Path):
-        with lzma.open(cache_path, "rb") as fp:
-            state_dict = torch.load(fp)
-        kwargs = dict(
-            post_process_cfg=self.post_process_cfg,
-        )
-        return [
-            state["cls"].from_state_dict(**kwargs, state_dict=state)
-            for state in state_dict
-        ]
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor):
-        mask_dets = self.mask_rcnn(im)
-        segmentation = mask_dets["segmentation"]
-        det = PersonDetection(
-            segmentation, **self.post_process_cfg, orig_imshape_CHW=im.shape
-        )
-        return [det]
-class KeypointMaskRCNNPersonDetector(BaseDetector):
-    def __init__(
-        self,
-        score_threshold: float,
-        mask_rcnn_cfg: dict,
-        cse_post_process_cfg: dict,
-        **kwargs
-    ) -> None:
-        super().__init__(**kwargs)
-        self.mask_rcnn = KeypointMaskRCNN(
-            **mask_rcnn_cfg, score_threshold=score_threshold
-        )
-        self.post_process_cfg = cse_post_process_cfg
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-    def load_from_cache(self, cache_path: Path):
-        with lzma.open(cache_path, "rb") as fp:
-            state_dict = torch.load(fp)
-        kwargs = dict(
-            post_process_cfg=self.post_process_cfg,
-        )
-        return [
-            state["cls"].from_state_dict(**kwargs, state_dict=state)
-            for state in state_dict
-        ]
-    @torch.no_grad()
-    def forward(self, im: torch.Tensor):
-        mask_dets = self.mask_rcnn(im)
-        segmentation = mask_dets["segmentation"]
-        det = PersonDetection(
-            segmentation,
-            **self.post_process_cfg,
-            orig_imshape_CHW=im.shape,
-            keypoints=mask_dets["keypoints"]
-        )
-        return [det]

dp2/detection/structures.py DELETED Viewed

@@ -1,464 +0,0 @@
-import torch
-import numpy as np
-from dp2 import utils
-from dp2.utils import vis_utils, crop_box
-from .utils import (
-    cut_pad_resize, masks_to_boxes,
-    get_kernel, transform_embedding, initialize_cse_boxes
-    )
-from .box_utils import get_expanded_bbox, include_box
-import torchvision
-import tops
-from .box_utils_fdf import expand_bbox as expand_bbox_fdf
-class VehicleDetection:
-    def __init__(self, segmentation: torch.BoolTensor) -> None:
-        self.segmentation = segmentation
-        self.boxes = masks_to_boxes(segmentation)
-        assert self.boxes.shape[1] == 4, self.boxes.shape
-        self.n_detections = self.segmentation.shape[0]
-        area = (self.boxes[:, 3] - self.boxes[:, 1]) * (self.boxes[:, 2] - self.boxes[:, 0])
-        sorted_idx = torch.argsort(area, descending=True)
-        self.segmentation = self.segmentation[sorted_idx]
-        self.boxes = self.boxes[sorted_idx].cpu()
-    def pre_process(self):
-        pass
-    def get_crop(self, idx: int, im):
-        assert idx < len(self)
-        box = self.boxes[idx]
-        im = crop_box(self.im, box)
-        mask = crop_box(self.segmentation[idx])
-        mask = mask == 0
-        return dict(img=im, mask=mask.float(), boxes=box)
-    def visualize(self, im):
-        if len(self) == 0:
-            return im
-        im = vis_utils.draw_mask(im.clone(), self.segmentation.logical_not())
-        return im
-    def __len__(self):
-        return self.n_detections
-    @staticmethod
-    def from_state_dict(state_dict, **kwargs):
-        numel = np.prod(state_dict["shape"])
-        arr = np.unpackbits(state_dict["segmentation"].numpy(), count=numel)
-        segmentation = tops.to_cuda(torch.from_numpy(arr)).view(state_dict["shape"])
-        return VehicleDetection(segmentation)
-    def state_dict(self, **kwargs):
-        segmentation = torch.from_numpy(np.packbits(self.segmentation.bool().cpu().numpy()))
-        return dict(segmentation=segmentation, cls=self.__class__, shape=self.segmentation.shape)
-class FaceDetection:
-    def __init__(self, boxes_ltrb: torch.LongTensor, target_imsize, fdf128_expand: bool, **kwargs) -> None:
-        self.boxes = boxes_ltrb.cpu()
-        assert self.boxes.shape[1] == 4, self.boxes.shape
-        self.target_imsize = tuple(target_imsize)
-        # Sory by area to paste in largest faces last
-        area = (self.boxes[:, 2] - self.boxes[:, 0]) * (self.boxes[:, 3] - self.boxes[:, 1]).view(-1)
-        idx = area.argsort(descending=False)
-        self.boxes = self.boxes[idx]
-        self.fdf128_expand = fdf128_expand
-    def visualize(self, im):
-        if len(self) == 0:
-            return im
-        orig_device = im.device
-        for box in self.boxes:
-            simple_expand = False if self.fdf128_expand else True
-            e_box = torch.from_numpy(expand_bbox_fdf(box.numpy(), im.shape[-2:], simple_expand))
-            im = torchvision.utils.draw_bounding_boxes(im.cpu(), e_box[None], colors=(0, 0, 255), width=2)
-        im = torchvision.utils.draw_bounding_boxes(im.cpu(), self.boxes, colors=(255, 0, 0), width=2)
-        return im.to(device=orig_device)
-    def get_crop(self, idx: int, im):
-        assert idx < len(self)
-        box = self.boxes[idx].numpy()
-        simple_expand = False if self.fdf128_expand else True
-        expanded_boxes = expand_bbox_fdf(box, im.shape[-2:], simple_expand=simple_expand)
-        im = cut_pad_resize(im, expanded_boxes, self.target_imsize, fdf_resize=True)
-        area = (self.boxes[:, 2] - self.boxes[:, 0]) * (self.boxes[:, 3] - self.boxes[:, 1]).view(-1)
-        # Find the square mask corresponding to box.
-        box_mask = box.copy().astype(float)
-        box_mask[[0, 2]] -= expanded_boxes[0]
-        box_mask[[1, 3]] -= expanded_boxes[1]
-        width = expanded_boxes[2] - expanded_boxes[0]
-        resize_factor = self.target_imsize[0] / width
-        box_mask = (box_mask * resize_factor).astype(int)
-        mask = torch.ones((1, *self.target_imsize), device=im.device, dtype=torch.float32)
-        crop_box(mask, box_mask).fill_(0)
-        return dict(
-            img=im[None], mask=mask[None],
-            boxes=torch.from_numpy(expanded_boxes).view(1, -1))
-    def __len__(self):
-        return len(self.boxes)
-    @staticmethod
-    def from_state_dict(state_dict, **kwargs):
-        return FaceDetection(state_dict["boxes"].cpu(),  **kwargs)
-    def state_dict(self, **kwargs):
-        return dict(boxes=self.boxes, cls=self.__class__)
-    def pre_process(self):
-        pass
-def remove_dilate_in_pad(mask: torch.Tensor, exp_box, orig_imshape):
-    """
-    Dilation happens after padding, which could place dilation in the padded area.
-    Remove this.
-    """
-    x0, y0, x1, y1 = exp_box
-    H, W = orig_imshape
-    # Padding in original image space
-    p_y0 = max(0, -y0)
-    p_y1 = max(y1 - H, 0)
-    p_x0 = max(0, -x0)
-    p_x1 = max(x1 - W, 0)
-    resize_ratio = mask.shape[-2] / (y1-y0)
-    p_x0, p_y0, p_x1, p_y1 = [(_*resize_ratio).floor().long() for _ in [p_x0, p_y0, p_x1, p_y1]]
-    mask[..., :p_y0, :] = 0
-    mask[..., :p_x0] = 0
-    mask[..., mask.shape[-2] - p_y1:, :] = 0
-    mask[..., mask.shape[-1] - p_x1:] = 0
-class CSEPersonDetection:
-    def __init__(self,
-            segmentation, cse_dets,
-            target_imsize,
-            exp_bbox_cfg, exp_bbox_filter,
-            dilation_percentage: float,
-            embed_map: torch.Tensor,
-            orig_imshape_CHW,
-            normalize_embedding: bool) -> None:
-        self.segmentation = segmentation
-        self.cse_dets = cse_dets
-        self.target_imsize = list(target_imsize)
-        self.pre_processed = False
-        self.exp_bbox_cfg = exp_bbox_cfg
-        self.exp_bbox_filter = exp_bbox_filter
-        self.dilation_percentage = dilation_percentage
-        self.embed_map = embed_map
-        self.normalize_embedding = normalize_embedding
-        if self.normalize_embedding:
-            embed_map_mean = self.embed_map.mean(dim=0, keepdim=True)
-            embed_map_rstd = ((self.embed_map - embed_map_mean).square().mean(dim=0, keepdim=True)+1e-8).rsqrt()
-            self.embed_map_normalized = (self.embed_map - embed_map_mean) * embed_map_rstd
-        self.orig_imshape_CHW = orig_imshape_CHW
-    @torch.no_grad()
-    def pre_process(self):
-        if self.pre_processed:
-            return
-        boxes = initialize_cse_boxes(self.segmentation, self.cse_dets["bbox_XYXY"]).cpu()
-        expanded_boxes = []
-        included_boxes = []
-        for i in range(len(boxes)):
-            exp_box = get_expanded_bbox(
-                boxes[i], self.orig_imshape_CHW[1:], self.segmentation[i], **self.exp_bbox_cfg,
-                target_aspect_ratio=self.target_imsize[0]/self.target_imsize[1])
-            if not include_box(exp_box, imshape=self.orig_imshape_CHW[1:], **self.exp_bbox_filter):
-                continue
-            included_boxes.append(i)
-            expanded_boxes.append(exp_box)
-        expanded_boxes = torch.LongTensor(expanded_boxes).view(-1, 4)
-        self.segmentation = self.segmentation[included_boxes]
-        self.cse_dets = {k: v[included_boxes] for k, v in self.cse_dets.items()}
-        self.mask = torch.empty((len(expanded_boxes), *self.target_imsize), device=tops.get_device(), dtype=torch.bool)
-        area = self.segmentation.sum(dim=[1, 2]).view(len(expanded_boxes))
-        for i, box in enumerate(expanded_boxes):
-            self.mask[i] = cut_pad_resize(self.segmentation[i:i+1], box, self.target_imsize)[0]
-        dilation_kernel = get_kernel(int((self.target_imsize[0]*self.target_imsize[1])**0.5*self.dilation_percentage))
-        self.maskrcnn_mask = self.mask.clone().logical_not()[:, None]
-        self.mask = utils.binary_dilation(self.mask[:, None], dilation_kernel)
-        [remove_dilate_in_pad(self.mask[i], expanded_boxes[i], self.orig_imshape_CHW[1:]) for i in range(len(expanded_boxes))]
-        self.boxes = expanded_boxes.cpu()
-        self.dilated_boxes = get_dilated_boxes(self.boxes, self.mask)
-        self.pre_processed = True
-        self.n_detections = len(self.boxes)
-        self.mask = self.mask.logical_not()
-        E_mask = torch.zeros((self.n_detections, 1, *self.target_imsize), device=self.mask.device, dtype=torch.bool)
-        self.vertices = torch.zeros_like(E_mask,  dtype=torch.long)
-        for i in range(self.n_detections):
-            E_, E_mask[i] = transform_embedding(
-                self.cse_dets["instance_embedding"][i],
-                self.cse_dets["instance_segmentation"][i],
-                self.boxes[i],
-                self.cse_dets["bbox_XYXY"][i].cpu(),
-                self.target_imsize
-            )
-            self.vertices[i] = utils.from_E_to_vertex(E_[None], E_mask[i:i+1].logical_not(), self.embed_map).squeeze()[None]
-        self.E_mask = E_mask
-        sorted_idx = torch.argsort(area, descending=False)
-        self.mask = self.mask[sorted_idx]
-        self.boxes = self.boxes[sorted_idx.cpu()]
-        self.vertices = self.vertices[sorted_idx]
-        self.E_mask = self.E_mask[sorted_idx]
-        self.maskrcnn_mask = self.maskrcnn_mask[sorted_idx]
-    def get_crop(self, idx: int, im):
-        self.pre_process()
-        assert idx < len(self)
-        box = self.boxes[idx]
-        mask = self.mask[idx]
-        im = cut_pad_resize(im, box, self.target_imsize).unsqueeze(0)
-        vertices_ = self.vertices[idx]
-        E_mask_ = self.E_mask[idx].float()
-        if self.normalize_embedding:
-            embedding = self.embed_map_normalized[vertices_.squeeze(dim=0)].permute(2, 0, 1) * E_mask_
-        else:
-            embedding = self.embed_map[vertices_.squeeze(dim=0)].permute(2, 0, 1) * E_mask_
-        return dict(
-            img=im,
-            mask=mask.float()[None],
-            boxes=box.reshape(1, -1),
-            E_mask=E_mask_[None],
-            vertices=vertices_[None],
-            embed_map=self.embed_map,
-            embedding=embedding[None],
-            maskrcnn_mask=self.maskrcnn_mask[idx].float()[None]
-        )
-    def __len__(self):
-        self.pre_process()
-        return self.n_detections
-    def state_dict(self, after_preprocess=False):
-        """
-            The processed annotations occupy more space than the original detections.
-        """
-        if not after_preprocess:
-            return {
-                "combined_segmentation": self.segmentation.bool(),
-                "cse_instance_segmentation": self.cse_dets["instance_segmentation"].bool(),
-                "cse_instance_embedding": self.cse_dets["instance_embedding"],
-                "cse_bbox_XYXY": self.cse_dets["bbox_XYXY"].long(),
-                "cls": self.__class__,
-                "orig_imshape_CHW": self.orig_imshape_CHW
-            }
-        self.pre_process()
-        return dict(
-            E_mask=torch.from_numpy(np.packbits(self.E_mask.bool().cpu().numpy())),
-            mask=torch.from_numpy(np.packbits(self.mask.bool().cpu().numpy())),
-            maskrcnn_mask=torch.from_numpy(np.packbits(self.maskrcnn_mask.bool().cpu().numpy())),
-            vertices=self.vertices.to(torch.int16).cpu(),
-            cls=self.__class__,
-            boxes=self.boxes,
-            orig_imshape_CHW=self.orig_imshape_CHW,
-        )
-    @staticmethod
-    def from_state_dict(
-            state_dict, embed_map,
-            post_process_cfg, **kwargs):
-        after_preprocess = "segmentation" not in state_dict and "combined_segmentation" not in state_dict
-        if after_preprocess:
-            detection = CSEPersonDetection(
-                segmentation=None, cse_dets=None, embed_map=embed_map,
-                orig_imshape_CHW=state_dict["orig_imshape_CHW"],
-                **post_process_cfg)
-            detection.vertices = tops.to_cuda(state_dict["vertices"].long())
-            numel = np.prod(detection.vertices.shape)
-            detection.E_mask = tops.to_cuda(torch.from_numpy(np.unpackbits(state_dict["E_mask"].numpy(), count=numel))).view(*detection.vertices.shape)
-            detection.mask = tops.to_cuda(torch.from_numpy(np.unpackbits(state_dict["mask"].numpy(), count=numel))).view(*detection.vertices.shape)
-            detection.maskrcnn_mask = tops.to_cuda(torch.from_numpy(np.unpackbits(state_dict["maskrcnn_mask"].numpy(), count=numel))).view(*detection.vertices.shape)
-            detection.n_detections = len(detection.mask)
-            detection.pre_processed = True
-            if isinstance(state_dict["boxes"], np.ndarray):
-                state_dict["boxes"] = torch.from_numpy(state_dict["boxes"])
-            detection.boxes = state_dict["boxes"]
-            return detection
-        cse_dets = dict(
-            instance_segmentation=state_dict["cse_instance_segmentation"],
-            instance_embedding=state_dict["cse_instance_embedding"],
-            embed_map=embed_map,
-            bbox_XYXY=state_dict["cse_bbox_XYXY"])
-        cse_dets = {k: tops.to_cuda(v) for k, v in cse_dets.items()}
-        segmentation = state_dict["combined_segmentation"]
-        return CSEPersonDetection(
-            segmentation, cse_dets, embed_map=embed_map,
-            orig_imshape_CHW=state_dict["orig_imshape_CHW"],
-            **post_process_cfg)
-    def visualize(self, im):
-        self.pre_process()
-        if len(self) == 0:
-            return im
-        im = vis_utils.draw_cropped_masks(
-            im.clone(), self.mask, self.boxes, visualize_instances=False)
-        E = self.embed_map[self.vertices.long()].squeeze(1).permute(0,3, 1, 2)
-        im = im.to(E.device)
-        im = vis_utils.draw_cse_all(
-            E, self.E_mask.squeeze(1).bool(), im,
-            self.boxes, self.embed_map)
-        im = torchvision.utils.draw_bounding_boxes(im.cpu(), self.boxes, colors=(255, 0, 0), width=2)
-        return im
-def shift_and_preprocess_keypoints(keypoints: torch.Tensor, boxes):
-    keypoints = keypoints.clone()
-    N = boxes.shape[0]
-    tops.assert_shape(keypoints, (N, None, 3))
-    tops.assert_shape(boxes, (N, 4))
-    x0, y0, x1, y1 = [_.view(-1, 1) for _ in boxes.T]
-    w = x1 - x0
-    h = y1 - y0
-    keypoints[:, :, 0] = (keypoints[:, :, 0] - x0) / w
-    keypoints[:, :, 1] = (keypoints[:, :, 1] - y0) / h
-    check_outside = lambda x: (x < 0).logical_or(x > 1)
-    is_outside = check_outside(keypoints[:, :,  0]).logical_or(check_outside(keypoints[:, :,  1]))
-    keypoints[:, :, 2] = keypoints[:, :, 2] >= 0
-    keypoints[:, :,  2] = (keypoints[:, :,  2] > 0).logical_and(is_outside.logical_not())
-    return keypoints
-class PersonDetection:
-    def __init__(
-            self,
-            segmentation,
-            target_imsize,
-            exp_bbox_cfg, exp_bbox_filter,
-            dilation_percentage: float,
-            orig_imshape_CHW,
-            keypoints=None,
-            **kwargs) -> None:
-        self.segmentation = segmentation
-        self.target_imsize = list(target_imsize)
-        self.pre_processed = False
-        self.exp_bbox_cfg = exp_bbox_cfg
-        self.exp_bbox_filter = exp_bbox_filter
-        self.dilation_percentage = dilation_percentage
-        self.orig_imshape_CHW = orig_imshape_CHW
-        self.keypoints = keypoints
-    @torch.no_grad()
-    def pre_process(self):
-        if self.pre_processed:
-            return
-        boxes = masks_to_boxes(self.segmentation).cpu()
-        expanded_boxes = []
-        included_boxes = []
-        for i in range(len(boxes)):
-            exp_box = get_expanded_bbox(
-                boxes[i], self.orig_imshape_CHW[1:], self.segmentation[i], **self.exp_bbox_cfg,
-                target_aspect_ratio=self.target_imsize[0]/self.target_imsize[1])
-            if not include_box(exp_box, imshape=self.orig_imshape_CHW[1:], **self.exp_bbox_filter):
-                continue
-            included_boxes.append(i)
-            expanded_boxes.append(exp_box)
-        expanded_boxes = torch.LongTensor(expanded_boxes).view(-1, 4)
-        self.segmentation = self.segmentation[included_boxes]
-        if self.keypoints is not None:
-            self.keypoints = self.keypoints[included_boxes]
-        area = self.segmentation.sum(dim=[1, 2]).view(len(expanded_boxes))
-        self.mask = torch.empty((len(expanded_boxes), *self.target_imsize), device=tops.get_device(), dtype=torch.bool)
-        for i, box in enumerate(expanded_boxes):
-            self.mask[i] = cut_pad_resize(self.segmentation[i:i+1], box, self.target_imsize)[0]
-        if self.keypoints is not None:
-            self.keypoints = shift_and_preprocess_keypoints(self.keypoints, expanded_boxes)
-        dilation_kernel = get_kernel(int((self.target_imsize[0]*self.target_imsize[1])**0.5*self.dilation_percentage))
-        self.maskrcnn_mask = self.mask.clone().logical_not()[:, None]
-        self.mask = utils.binary_dilation(self.mask[:, None], dilation_kernel)
-        [remove_dilate_in_pad(self.mask[i], expanded_boxes[i], self.orig_imshape_CHW[1:]) for i in range(len(expanded_boxes))]
-        self.boxes = expanded_boxes
-        self.dilated_boxes = get_dilated_boxes(self.boxes, self.mask)
-        self.pre_processed = True
-        self.n_detections = len(self.boxes)
-        self.mask = self.mask.logical_not()
-        sorted_idx = torch.argsort(area, descending=False)
-        self.mask = self.mask[sorted_idx]
-        self.boxes = self.boxes[sorted_idx.cpu()]
-        self.segmentation = self.segmentation[sorted_idx]
-        self.maskrcnn_mask = self.maskrcnn_mask[sorted_idx]
-        if self.keypoints is not None:
-            self.keypoints = self.keypoints[sorted_idx]
-    def get_crop(self, idx: int, im: torch.Tensor):
-        assert idx < len(self)
-        self.pre_process()
-        box = self.boxes[idx]
-        mask = self.mask[idx][None].float()
-        im = cut_pad_resize(im, box, self.target_imsize).unsqueeze(0)
-        batch = dict(
-            img=im, mask=mask, boxes=box.reshape(1, -1),
-            maskrcnn_mask=self.maskrcnn_mask[idx][None].float())
-        if self.keypoints is not None:
-            batch["keypoints"] = self.keypoints[idx:idx+1]
-        return batch
-    def __len__(self):
-        self.pre_process()
-        return self.n_detections
-    def state_dict(self, **kwargs):
-        return dict(
-            segmentation=self.segmentation.bool(),
-            cls=self.__class__,
-            orig_imshape_CHW=self.orig_imshape_CHW,
-            keypoints=self.keypoints
-            )
-    @staticmethod
-    def from_state_dict(
-            state_dict,
-            post_process_cfg, **kwargs):
-        return PersonDetection(
-            state_dict["segmentation"],
-            orig_imshape_CHW=state_dict["orig_imshape_CHW"],
-            **post_process_cfg,
-            keypoints=state_dict["keypoints"])
-    def visualize(self, im):
-        self.pre_process()
-        im = im.cpu()
-        if len(self) == 0:
-            return im
-        im = vis_utils.draw_cropped_masks(im.clone(), self.mask, self.boxes, visualize_instances=False)
-        im = vis_utils.draw_cropped_keypoints(im, self.keypoints, self.boxes)
-        return im
-def get_dilated_boxes(exp_bbox: torch.LongTensor, mask):
-    """
-        mask: resized mask
-    """
-    assert exp_bbox.shape[0] == mask.shape[0]
-    boxes = masks_to_boxes(mask.squeeze(1)).cpu()
-    H, W = exp_bbox[:, 3] - exp_bbox[:, 1], exp_bbox[:, 2] - exp_bbox[:, 0]
-    boxes[:, [0, 2]] = (boxes[:, [0, 2]] * W[:, None] / mask.shape[-1]).long()
-    boxes[:, [1, 3]] = (boxes[:, [1, 3]] * H[:, None] / mask.shape[-2]).long()
-    boxes[:, [0, 2]] += exp_bbox[:, 0:1]
-    boxes[:, [1, 3]] += exp_bbox[:, 1:2]
-    return boxes