MarkMoHR commited on Jun 7, 2024

Commit

7aefe45

1 Parent(s): 762579f

added code

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +153 -0
config/diffsketchedit.yaml +75 -0
docs/figures/refine/ldm_generated_image0.png +3 -0
docs/figures/refine/ldm_generated_image1.png +3 -0
docs/figures/refine/ldm_generated_image2.png +3 -0
docs/figures/refine/visual_best-rendered0.png +3 -0
docs/figures/refine/visual_best-rendered1.png +3 -0
docs/figures/refine/visual_best-rendered2.png +3 -0
docs/figures/replace/ldm_generated_image0.png +3 -0
docs/figures/replace/ldm_generated_image1.png +3 -0
docs/figures/replace/ldm_generated_image2.png +3 -0
docs/figures/replace/ldm_generated_image3.png +3 -0
docs/figures/replace/visual_best-rendered0.png +3 -0
docs/figures/replace/visual_best-rendered1.png +3 -0
docs/figures/replace/visual_best-rendered2.png +3 -0
docs/figures/replace/visual_best-rendered3.png +3 -0
docs/figures/reweight/ldm_generated_image0.png +3 -0
docs/figures/reweight/ldm_generated_image1.png +3 -0
docs/figures/reweight/ldm_generated_image2.png +3 -0
docs/figures/reweight/visual_best-rendered0.png +3 -0
docs/figures/reweight/visual_best-rendered1.png +3 -0
docs/figures/reweight/visual_best-rendered2.png +3 -0
libs/__init__.py +9 -0
libs/engine/__init__.py +7 -0
libs/engine/config_processor.py +151 -0
libs/engine/model_state.py +335 -0
libs/metric/__init__.py +1 -0
libs/metric/accuracy.py +25 -0
libs/metric/clip_score/__init__.py +3 -0
libs/metric/clip_score/openaiCLIP_loss.py +304 -0
libs/metric/lpips_origin/__init__.py +3 -0
libs/metric/lpips_origin/lpips.py +184 -0
libs/metric/lpips_origin/pretrained_networks.py +196 -0
libs/metric/lpips_origin/weights/v0.1/alex.pth +0 -0
libs/metric/lpips_origin/weights/v0.1/squeeze.pth +0 -0
libs/metric/lpips_origin/weights/v0.1/vgg.pth +0 -0
libs/metric/piq/__init__.py +2 -0
libs/metric/piq/functional/__init__.py +15 -0
libs/metric/piq/functional/base.py +111 -0
libs/metric/piq/functional/colour_conversion.py +136 -0
libs/metric/piq/functional/filters.py +111 -0
libs/metric/piq/functional/layers.py +33 -0
libs/metric/piq/functional/resize.py +426 -0
libs/metric/piq/perceptual.py +496 -0
libs/metric/piq/utils/__init__.py +7 -0
libs/metric/piq/utils/common.py +158 -0
libs/metric/pytorch_fid/__init__.py +54 -0
libs/metric/pytorch_fid/fid_score.py +322 -0
libs/metric/pytorch_fid/inception.py +341 -0
libs/modules/__init__.py +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,153 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# .idea
+.idea/
+/idea/
+*.ipr
+*.iml
+*.iws
+# system
+.DS_Store
+# pytorch-lighting logs
+lightning_logs/*
+# Edit settings
+.editorconfig
+# local results
+/workdir/
+.workdir/
+# dataset
+/dataset/
+!/dataset/placeholder.md

config/diffsketchedit.yaml ADDED Viewed

	@@ -0,0 +1,75 @@

+seed: 1
+image_size: 224
+mask_object: False # if the target image contains background, it's better to mask it out
+fix_scale: False  # if the target image is not squared, it is recommended to fix the scale
+# train
+num_iter: 1000
+batch_size: 1
+num_stages: 1 # training stages, you can train x strokes, then freeze them and train another x strokes etc
+lr_scheduler: False
+lr_decay_rate: 0.1
+decay_steps: [ 1000, 1500 ]
+lr: 1
+color_lr: 0.01
+pruning_freq: 50
+color_vars_threshold: 0.1
+width_lr: 0.1
+max_width: 50 # stroke width
+# stroke attrs
+num_paths: 96 # number of strokes
+width: 1.0 # stroke width
+control_points_per_seg: 4
+num_segments: 1
+optim_opacity: True # if True, the stroke opacity is optimized
+optim_width: False  # if True, the stroke width is optimized
+optim_rgba: False # if True, the stroke RGBA is optimized
+opacity_delta: 0 # stroke pruning
+# init strokes
+attention_init: True # if True, use the attention heads of Dino model to set the location of the initial strokes
+xdog_intersec: True # initialize along the edge, mix XDoG and attn up
+softmax_temp: 0.5
+cross_attn_res: 16
+self_attn_res: 32
+max_com: 20 # select the number of the self-attn maps
+mean_comp: False # the average of the self-attn maps
+comp_idx: 0 # if mean_comp==False, indicates the index of the self-attn map
+attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+log_cross_attn: False # True if cross attn every step
+u2net_path: "./checkpoint/u2net/u2net.pth"
+# ldm
+model_id: "sd14"
+ldm_speed_up: False
+enable_xformers: False
+gradient_checkpoint: False
+#token_ind: 1 # the index of CLIP prompt embedding, start from 1
+use_ddim: True
+num_inference_steps: 50
+guidance_scale: 7.5 # sdxl default 5.0
+# ASDS loss
+sds:
+  crop_size: 512
+  augmentations: "affine"
+  guidance_scale: 100
+  grad_scale: 1e-5
+  t_range: [ 0.05, 0.95 ]
+  warmup: 0
+clip:
+  model_name: "RN101"  # RN101, ViT-L/14
+  feats_loss_type: "l2" # clip visual loss type, conv layers
+  feats_loss_weights: [ 0,0,1.0,1.0,0 ] # RN based
+  #  feats_loss_weights: [ 0,0,1.0,1.0,0,0,0,0,0,0,0,0 ] # ViT based
+  fc_loss_weight: 0.1 # clip visual loss, fc layer weight
+  augmentations: "affine" # augmentation before clip visual computation
+  num_aug: 4 # num of augmentation before clip visual computation
+  vis_loss: 1 # 1 or 0 for use or disable clip visual loss
+  text_visual_coeff: 0 # cosine similarity between text and img
+perceptual:
+  name: "lpips" # dists
+  lpips_net: 'vgg'
+  coeff: 0.2

docs/figures/refine/ldm_generated_image0.png ADDED Viewed

Git LFS Details

SHA256: 75a0f634d343e08b4c9fe1486b1ff8e2ff330322ff2c8aa6d67f37704425e844
Pointer size: 131 Bytes
Size of remote file: 335 kB

docs/figures/refine/ldm_generated_image1.png ADDED Viewed

Git LFS Details

SHA256: 94d8ccb932a0c088e8014dc89652b65b5325ebfb10b217c6be7c305cd50527a3
Pointer size: 131 Bytes
Size of remote file: 332 kB

docs/figures/refine/ldm_generated_image2.png ADDED Viewed

Git LFS Details

SHA256: b7770774aea4ecd71b3daebd81d551ef5ae0bfc457e99f9ace53dad266115dc3
Pointer size: 131 Bytes
Size of remote file: 334 kB

docs/figures/refine/visual_best-rendered0.png ADDED Viewed

Git LFS Details

SHA256: 6c61fe29bd5d1bb07b7f519587324f8dd50f83dbac2eb6256cd4716a6d36b63c
Pointer size: 130 Bytes
Size of remote file: 28.2 kB

docs/figures/refine/visual_best-rendered1.png ADDED Viewed

Git LFS Details

SHA256: c5efbe511ae39e108ae1f1d98431d6eb9c5650ae17706d1270bb839797ac0fb9
Pointer size: 130 Bytes
Size of remote file: 29.2 kB

docs/figures/refine/visual_best-rendered2.png ADDED Viewed

Git LFS Details

SHA256: 80871b6669d46533a38c17164ef5a1992ce4cfa3fb1ddb6422b1a451c2b2e0d7
Pointer size: 130 Bytes
Size of remote file: 29.5 kB

docs/figures/replace/ldm_generated_image0.png ADDED Viewed

Git LFS Details

SHA256: 12cf20fbcb849f388e004a95d873a141f18cb7645e006438d8e789cd5dd83a4c
Pointer size: 131 Bytes
Size of remote file: 462 kB

docs/figures/replace/ldm_generated_image1.png ADDED Viewed

Git LFS Details

SHA256: a35c9877e8790665b7ab05f49621241ebdf8b1f3ab0cb3079b5f932cc5337a0d
Pointer size: 131 Bytes
Size of remote file: 461 kB

docs/figures/replace/ldm_generated_image2.png ADDED Viewed

Git LFS Details

SHA256: 464ba1a05a0835904bec99233100e88efea7e9cc751c95817e27cec7e9bd4991
Pointer size: 131 Bytes
Size of remote file: 460 kB

docs/figures/replace/ldm_generated_image3.png ADDED Viewed

Git LFS Details

SHA256: 2f56a7cd6a71efe5231de48eb74699a91890778179111b0acff7cae0fda073f3
Pointer size: 131 Bytes
Size of remote file: 485 kB

docs/figures/replace/visual_best-rendered0.png ADDED Viewed

Git LFS Details

SHA256: bac660e47ecfc7f8b3d30181df9729afdaa55af72ae662cd67fab736c5869f09
Pointer size: 130 Bytes
Size of remote file: 44 kB

docs/figures/replace/visual_best-rendered1.png ADDED Viewed

Git LFS Details

SHA256: c34d8ab9e3f3c9563ef019b3be48262940c940a75b138635b43f0c54e452de17
Pointer size: 130 Bytes
Size of remote file: 50.3 kB

docs/figures/replace/visual_best-rendered2.png ADDED Viewed

Git LFS Details

SHA256: 61c57adbce024e3ae95c214e449aba74144bb8c374075c2ff4a37d0083c2f70e
Pointer size: 130 Bytes
Size of remote file: 52.7 kB

docs/figures/replace/visual_best-rendered3.png ADDED Viewed

Git LFS Details

SHA256: b030cdd81791c3a3bf350d60523a2ee47af4ab1dc0028e22ddf4ec85d3df1a12
Pointer size: 130 Bytes
Size of remote file: 59.1 kB

docs/figures/reweight/ldm_generated_image0.png ADDED Viewed

Git LFS Details

SHA256: af8c459a5f27d40ce4db6e3a4bcea442eac50d7d768010cee46db38e384e5af5
Pointer size: 131 Bytes
Size of remote file: 467 kB

docs/figures/reweight/ldm_generated_image1.png ADDED Viewed

Git LFS Details

SHA256: 3bf5adb91c7881eb2ec46dc9d54f980e6d5b1a0465b84fe0715313fcb9cb2312
Pointer size: 131 Bytes
Size of remote file: 499 kB

docs/figures/reweight/ldm_generated_image2.png ADDED Viewed

Git LFS Details

SHA256: 3516da234863316239902f9058bef26c2c81bc48cbd49ac4060519c9d63797f1
Pointer size: 131 Bytes
Size of remote file: 509 kB

docs/figures/reweight/visual_best-rendered0.png ADDED Viewed

Git LFS Details

SHA256: 47648174430fd29712507baaf9bfe9afa9c810258531c7d724c9bfbee82d642b
Pointer size: 130 Bytes
Size of remote file: 32.1 kB

docs/figures/reweight/visual_best-rendered1.png ADDED Viewed

Git LFS Details

SHA256: 83ec8e4b3a5df5229a2503c3b728f8bf1cf7dc7aea09b77570f908ace62951cc
Pointer size: 130 Bytes
Size of remote file: 30.7 kB

docs/figures/reweight/visual_best-rendered2.png ADDED Viewed

Git LFS Details

SHA256: 3acd23ddb8c6e77f6133cd91ebd0e624fa72101f0c964e11796041ebe5f31e38
Pointer size: 130 Bytes
Size of remote file: 35.4 kB

libs/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .utils import lazy
+__getattr__, __dir__, __all__ = lazy.attach(
+    __name__,
+    submodules={'engine', 'metric', 'modules', 'solver', 'utils'},
+    submod_attrs={}
+)
+__version__ = '0.0.1'

libs/engine/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .model_state import ModelState
+from .config_processor import merge_and_update_config
+__all__ = [
+    'ModelState',
+    'merge_and_update_config'
+]

libs/engine/config_processor.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import os
+from typing import Tuple
+from functools import reduce
+from argparse import Namespace
+from omegaconf import DictConfig, OmegaConf
+#################################################################################
+#                             merge yaml and argparse                           #
+#################################################################################
+def register_resolver():
+    OmegaConf.register_new_resolver(
+        "add", lambda *numbers: sum(numbers)
+    )
+    OmegaConf.register_new_resolver(
+        "multiply", lambda *numbers: reduce(lambda x, y: x * y, numbers)
+    )
+    OmegaConf.register_new_resolver(
+        "sub", lambda n1, n2: n1 - n2
+    )
+def _merge_args_and_config(
+        cmd_args: Namespace,
+        yaml_config: DictConfig,
+        read_only: bool = False
+) -> Tuple[DictConfig, DictConfig, DictConfig]:
+    # convert cmd line args to OmegaConf
+    cmd_args_dict = vars(cmd_args)
+    cmd_args_list = []
+    for k, v in cmd_args_dict.items():
+        cmd_args_list.append(f"{k}={v}")
+    cmd_args_conf = OmegaConf.from_cli(cmd_args_list)
+    # The following overrides the previous configuration
+    # cmd_args_list > configs
+    args_ = OmegaConf.merge(yaml_config, cmd_args_conf)
+    if read_only:
+        OmegaConf.set_readonly(args_, True)
+    return args_, cmd_args_conf, yaml_config
+def merge_configs(args, method_cfg_path):
+    """merge command line args (argparse) and config file (OmegaConf)"""
+    yaml_config_path = os.path.join("./", "config", method_cfg_path)
+    try:
+        yaml_config = OmegaConf.load(yaml_config_path)
+    except FileNotFoundError as e:
+        print(f"error: {e}")
+        print(f"input file path: `{method_cfg_path}`")
+        print(f"config path: `{yaml_config_path}` not found.")
+        raise FileNotFoundError(e)
+    return _merge_args_and_config(args, yaml_config, read_only=False)
+def update_configs(source_args, update_nodes, strict=True, remove_update_nodes=True):
+    """update config file (OmegaConf) with dotlist"""
+    if update_nodes is None:
+        return source_args
+    update_args_list = str(update_nodes).split()
+    if len(update_args_list) < 1:
+        return source_args
+    # check update_args
+    for item in update_args_list:
+        item_key_ = str(item).split('=')[0]  # get key
+        # item_val_ = str(item).split('=')[1]  # get value
+        if strict:
+            # Tests if a key is existing
+            # assert OmegaConf.select(source_args, item_key_) is not None, f"{item_key_} is not existing."
+            # Tests if a value is missing
+            assert not OmegaConf.is_missing(source_args, item_key_), f"the value of {item_key_} is missing."
+            # if keys is None, then add key and set the value
+            if OmegaConf.select(source_args, item_key_) is None:
+                source_args.item_key_ = item_key_
+    # update original yaml params
+    update_nodes = OmegaConf.from_dotlist(update_args_list)
+    merged_args = OmegaConf.merge(source_args, update_nodes)
+    # remove update_args
+    if remove_update_nodes:
+        OmegaConf.update(merged_args, 'update', '')
+    return merged_args
+def update_if_exist(source_args, update_nodes):
+    """update config file (OmegaConf) with dotlist"""
+    if update_nodes is None:
+        return source_args
+    upd_args_list = str(update_nodes).split()
+    if len(upd_args_list) < 1:
+        return source_args
+    update_args_list = []
+    for item in upd_args_list:
+        item_key_ = str(item).split('=')[0]  # get key
+        # if a key is existing
+        # if OmegaConf.select(source_args, item_key_) is not None:
+        #     update_args_list.append(item)
+        update_args_list.append(item)
+    # update source_args if key be selected
+    if len(update_args_list) < 1:
+        merged_args = source_args
+    else:
+        update_nodes = OmegaConf.from_dotlist(update_args_list)
+        merged_args = OmegaConf.merge(source_args, update_nodes)
+    return merged_args
+def merge_and_update_config(args):
+    register_resolver()
+    # if yaml_config is existing, then merge command line args and yaml_config
+    # if os.path.isfile(args.config) and args.config is not None:
+    if args.config is not None and str(args.config).endswith('.yaml'):
+        merged_args, cmd_args, yaml_config = merge_configs(args, args.config)
+    else:
+        merged_args, cmd_args, yaml_config = args, args, None
+    # update the yaml_config with the cmd '-update' flag
+    update_nodes = args.update
+    final_args = update_configs(merged_args, update_nodes)
+    # to simplify log output, we empty this
+    yaml_config_update = update_if_exist(yaml_config, update_nodes)
+    cmd_args_update = update_if_exist(cmd_args, update_nodes)
+    cmd_args_update.update = ""  # clear update params
+    final_args.yaml_config = yaml_config_update
+    final_args.cmd_args = cmd_args_update
+    # update seed
+    if final_args.seed < 0:
+        import random
+        final_args.seed = random.randint(0, 65535)
+    return final_args

libs/engine/model_state.py ADDED Viewed

	@@ -0,0 +1,335 @@

+import os
+from functools import partial
+from typing import Union, List
+from pathlib import Path
+from datetime import datetime, timedelta
+from omegaconf import DictConfig
+from pprint import pprint
+import torch
+from accelerate.utils import LoggerType
+from accelerate import (
+    Accelerator,
+    GradScalerKwargs,
+    DistributedDataParallelKwargs,
+    InitProcessGroupKwargs
+)
+from ..modules.ema import EMA
+from ..utils.logging import get_logger
+class ModelState:
+    """
+    Handling logger and `hugging face` accelerate training
+    features:
+        - Mixed Precision
+        - Gradient Scaler
+        - Gradient Accumulation
+        - Optimizer
+        - EMA
+        - Logger (default: python print)
+        - Monitor (default: wandb, tensorboard)
+    """
+    def __init__(
+            self,
+            args,
+            log_path_suffix: str = None,
+            ignore_log=False,  # whether to create log file or not
+    ) -> None:
+        self.args: DictConfig = args
+        """check valid"""
+        mixed_precision = self.args.get("mixed_precision")
+        # Bug: omegaconf convert 'no' to false
+        mixed_precision = "no" if type(mixed_precision) == bool else mixed_precision
+        split_batches = self.args.get("split_batches", False)
+        gradient_accumulate_step = self.args.get("gradient_accumulate_step", 1)
+        assert gradient_accumulate_step >= 1, f"except gradient_accumulate_step >= 1, get {gradient_accumulate_step}"
+        """create working space"""
+        # rule: ['./config'. 'method_name', 'exp_name.yaml']
+        # -> results_path: ./runs/{method_name}-{exp_name}, as a base folder
+        # config_prefix, config_name = str(self.args.get("config")).split('/')
+        # config_name_only = str(config_name).split(".")[0]
+        config_name_only = str(self.args.get("config")).split(".")[0]
+        results_folder = self.args.get("results_path", None)
+        if results_folder is None:
+            # self.results_path = Path("./workdir") / f"{config_prefix}-{config_name_only}"
+            self.results_path = Path("./workdir")
+        else:
+            # self.results_path = Path(results_folder) / f"{config_prefix}-{config_name_only}"
+            self.results_path = Path(os.path.join(results_folder, self.args.get("edit_type"), ))
+        # update results_path: ./runs/{method_name}-{exp_name}/{log_path_suffix}
+        # noting: can be understood as "results dir / methods / ablation study / your result"
+        if log_path_suffix is not None:
+            self.results_path = self.results_path / log_path_suffix
+        kwargs_handlers = []
+        """mixed precision training"""
+        if args.mixed_precision == "no":
+            scaler_handler = GradScalerKwargs(
+                init_scale=args.init_scale,
+                growth_factor=args.growth_factor,
+                backoff_factor=args.backoff_factor,
+                growth_interval=args.growth_interval,
+                enabled=True
+            )
+            kwargs_handlers.append(scaler_handler)
+        """distributed training"""
+        ddp_handler = DistributedDataParallelKwargs(
+            dim=0,
+            broadcast_buffers=True,
+            static_graph=False,
+            bucket_cap_mb=25,
+            find_unused_parameters=False,
+            check_reduction=False,
+            gradient_as_bucket_view=False
+        )
+        kwargs_handlers.append(ddp_handler)
+        init_handler = InitProcessGroupKwargs(timeout=timedelta(seconds=1200))
+        kwargs_handlers.append(init_handler)
+        """init visualized tracker"""
+        log_with = []
+        self.args.visual = False
+        if args.use_wandb:
+            log_with.append(LoggerType.WANDB)
+        if args.tensorboard:
+            log_with.append(LoggerType.TENSORBOARD)
+        """hugging face Accelerator"""
+        self.accelerator = Accelerator(
+            device_placement=True,
+            split_batches=split_batches,
+            mixed_precision=mixed_precision,
+            gradient_accumulation_steps=args.gradient_accumulate_step,
+            cpu=True if args.use_cpu else False,
+            log_with=None if len(log_with) == 0 else log_with,
+            project_dir=self.results_path / "vis",
+            kwargs_handlers=kwargs_handlers,
+        )
+        """logs"""
+        if self.accelerator.is_local_main_process:
+            # for logging results in a folder periodically
+            self.results_path.mkdir(parents=True, exist_ok=True)
+            if not ignore_log:
+                now_time = datetime.now().strftime('%Y-%m-%d-%H-%M')
+                # self.logger = get_logger(
+                #     logs_dir=self.results_path.as_posix(),
+                #     file_name=f"log.txt"
+                # )
+            print("==> command line args: ")
+            print(args.cmd_args)
+            print("==> yaml config args: ")
+            print(args.yaml_config)
+            print("\n***** Model State *****")
+            if self.accelerator.distributed_type != "NO":
+                print(f"-> Distributed Type: {self.accelerator.distributed_type}")
+            print(f"-> Split Batch Size: {split_batches}, Total Batch Size: {self.actual_batch_size}")
+            print(f"-> Mixed Precision: {mixed_precision}, AMP: {self.accelerator.native_amp},"
+                  f" Gradient Accumulate Step: {gradient_accumulate_step}")
+            print(f"-> Weight dtype:  {self.weight_dtype}")
+            if self.accelerator.scaler_handler is not None and self.accelerator.scaler_handler.enabled:
+                print(f"-> Enabled GradScaler: {self.accelerator.scaler_handler.to_kwargs()}")
+            if args.use_wandb:
+                print(f"-> Init trackers: 'wandb' ")
+                self.args.visual = True
+                self.__init_tracker(project_name="my_project", tags=None, entity="")
+            print(f"-> Working Space: '{self.results_path}'")
+        """EMA"""
+        self.use_ema = args.get('ema', False)
+        self.ema_wrapper = self.__build_ema_wrapper()
+        """glob step"""
+        self.step = 0
+        """log process"""
+        self.accelerator.wait_for_everyone()
+        print(f'Process {self.accelerator.process_index} using device: {self.accelerator.device}')
+        self.print("-> state initialization complete \n")
+    def __init_tracker(self, project_name, tags, entity):
+        self.accelerator.init_trackers(
+            project_name=project_name,
+            config=dict(self.args),
+            init_kwargs={
+                "wandb": {
+                    "notes": "accelerate trainer pipeline",
+                    "tags": [
+                        f"total batch_size: {self.actual_batch_size}"
+                    ],
+                    "entity": entity,
+                }}
+        )
+    def __build_ema_wrapper(self):
+        if self.use_ema:
+            self.print(f"-> EMA: {self.use_ema}, decay: {self.args.ema_decay}, "
+                       f"update_after_step: {self.args.ema_update_after_step}, "
+                       f"update_every: {self.args.ema_update_every}")
+            ema_wrapper = partial(
+                EMA, beta=self.args.ema_decay,
+                update_after_step=self.args.ema_update_after_step,
+                update_every=self.args.ema_update_every
+            )
+        else:
+            ema_wrapper = None
+        return ema_wrapper
+    @property
+    def device(self):
+        return self.accelerator.device
+    @property
+    def weight_dtype(self):
+        weight_dtype = torch.float32
+        if self.accelerator.mixed_precision == "fp16":
+            weight_dtype = torch.float16
+        elif self.accelerator.mixed_precision == "bf16":
+            weight_dtype = torch.bfloat16
+        return weight_dtype
+    @property
+    def actual_batch_size(self):
+        if self.accelerator.split_batches is False:
+            actual_batch_size = self.args.batch_size * self.accelerator.num_processes * self.accelerator.gradient_accumulation_steps
+        else:
+            assert self.actual_batch_size % self.accelerator.num_processes == 0
+            actual_batch_size = self.args.batch_size
+        return actual_batch_size
+    @property
+    def n_gpus(self):
+        return self.accelerator.num_processes
+    @property
+    def no_decay_params_names(self):
+        no_decay = [
+            "bn", "LayerNorm", "GroupNorm",
+        ]
+        return no_decay
+    def no_decay_params(self, model, weight_decay):
+        """optimization tricks"""
+        optimizer_grouped_parameters = [
+            {
+                "params": [
+                    p for n, p in model.named_parameters()
+                    if not any(nd in n for nd in self.no_decay_params_names)
+                ],
+                "weight_decay": weight_decay,
+            },
+            {
+                "params": [
+                    p for n, p in model.named_parameters()
+                    if any(nd in n for nd in self.no_decay_params_names)
+                ],
+                "weight_decay": 0.0,
+            },
+        ]
+        return optimizer_grouped_parameters
+    def optimized_params(self, model: torch.nn.Module, verbose=True) -> List:
+        """return parameters if `requires_grad` is True
+        Args:
+            model: pytorch models
+            verbose: log optimized parameters
+        Examples:
+            >>> self.params_optimized = self.optimized_params(uvit, verbose=True)
+            >>> optimizer = torch.optim.AdamW(self.params_optimized, lr=args.lr)
+        Returns:
+                a list of parameters
+        """
+        params_optimized = []
+        for key, value in model.named_parameters():
+            if value.requires_grad:
+                params_optimized.append(value)
+                if verbose:
+                    self.print("\t {}, {}, {}".format(key, value.numel(), value.shape))
+        return params_optimized
+    def save_everything(self, fpath: str):
+        """Saving and loading the model, optimizer, RNG generators, and the GradScaler."""
+        if not self.accelerator.is_main_process:
+            return
+        self.accelerator.save_state(fpath)
+    def load_save_everything(self, fpath: str):
+        """Loading the model, optimizer, RNG generators, and the GradScaler."""
+        self.accelerator.load_state(fpath)
+    def save(self, milestone: Union[str, float, int], checkpoint: object) -> None:
+        if not self.accelerator.is_main_process:
+            return
+        torch.save(checkpoint, self.results_path / f'model-{milestone}.pt')
+    def save_in(self, root: Union[str, Path], checkpoint: object) -> None:
+        if not self.accelerator.is_main_process:
+            return
+        torch.save(checkpoint, root)
+    def load_ckpt_model_only(self, model: torch.nn.Module, path: Union[str, Path], rm_module_prefix: bool = False):
+        ckpt = torch.load(path, map_location=self.accelerator.device)
+        unwrapped_model = self.accelerator.unwrap_model(model)
+        if rm_module_prefix:
+            unwrapped_model.load_state_dict({k.replace('module.', ''): v for k, v in ckpt.items()})
+        else:
+            unwrapped_model.load_state_dict(ckpt)
+        return unwrapped_model
+    def load_shared_weights(self, model: torch.nn.Module, path: Union[str, Path]):
+        ckpt = torch.load(path, map_location=self.accelerator.device)
+        self.print(f"pretrained_dict len: {len(ckpt)}")
+        unwrapped_model = self.accelerator.unwrap_model(model)
+        model_dict = unwrapped_model.state_dict()
+        pretrained_dict = {k: v for k, v in ckpt.items() if k in model_dict}
+        model_dict.update(pretrained_dict)
+        unwrapped_model.load_state_dict(model_dict, strict=False)
+        self.print(f"selected pretrained_dict: {len(model_dict)}")
+        return unwrapped_model
+    def print(self, *args, **kwargs):
+        """Use in replacement of `print()` to only print once per server."""
+        self.accelerator.print(*args, **kwargs)
+    def pretty_print(self, msg):
+        if self.accelerator.is_local_main_process:
+            pprint(dict(msg))
+    def close_tracker(self):
+        self.accelerator.end_training()
+    def free_memory(self):
+        self.accelerator.clear()
+    def close(self, msg: str = "Training complete."):
+        """Use in end of training."""
+        self.free_memory()
+        if torch.cuda.is_available():
+            self.print(f'\nGPU memory usage: {torch.cuda.max_memory_reserved() / 1024 ** 3:.2f} GB')
+        if self.args.visual:
+            self.close_tracker()
+        self.print(msg)

libs/metric/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

libs/metric/accuracy.py ADDED Viewed

	@@ -0,0 +1,25 @@

+def accuracy(output, target, topk=(1,)):
+    """
+    Computes the accuracy over the k top predictions for the specified values of k.
+    Args
+        output: logits or probs (num of batch, num of classes)
+        target: (num of batch, 1) or (num of batch, )
+        topk: list of returned k
+    refer: https://github.com/pytorch/examples/blob/master/imagenet/main.py
+    """
+    maxK = max(topk)  # get k in top-k
+    batch_size = target.size(0)
+    _, pred = output.topk(k=maxK, dim=1, largest=True, sorted=True)  # pred: [num of batch, k]
+    pred = pred.t()  # pred: [k, num of batch]
+    # [1, num of batch] -> [k, num_of_batch] : bool
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+    res = []
+    for k in topk:
+        correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res  # np.shape(res): [k, 1]

libs/metric/clip_score/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .openaiCLIP_loss import CLIPScoreWrapper
2	+
3	+ __all__ = ['CLIPScoreWrapper']

libs/metric/clip_score/openaiCLIP_loss.py ADDED Viewed

	@@ -0,0 +1,304 @@

+from typing import Union, List, Tuple
+from collections import OrderedDict
+from functools import partial
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+class CLIPScoreWrapper(nn.Module):
+    def __init__(self,
+                 clip_model_name: str,
+                 download_root: str = None,
+                 device: torch.device = "cuda" if torch.cuda.is_available() else "cpu",
+                 jit: bool = False,
+                 # additional params
+                 visual_score: bool = False,
+                 feats_loss_type: str = None,
+                 feats_loss_weights: List[float] = None,
+                 fc_loss_weight: float = None,
+                 context_length: int = 77):
+        super().__init__()
+        import clip  # local import
+        # check model info
+        self.clip_model_name = clip_model_name
+        self.device = device
+        self.available_models = clip.available_models()
+        assert clip_model_name in self.available_models, f"A model backbone: {clip_model_name} that does not exist"
+        # load CLIP
+        self.model, self.preprocess = clip.load(clip_model_name, device, jit=jit, download_root=download_root)
+        self.model.eval()
+        # load tokenize
+        self.tokenize_fn = partial(clip.tokenize, context_length=context_length)
+        # load CLIP visual
+        self.visual_encoder = VisualEncoderWrapper(self.model, clip_model_name).to(device)
+        self.visual_encoder.eval()
+        # check loss weights
+        self.visual_score = visual_score
+        if visual_score:
+            assert feats_loss_type in ["l1", "l2", "cosine"], f"{feats_loss_type} is not exist."
+            if clip_model_name.startswith("ViT"): assert len(feats_loss_weights) == 12
+            if clip_model_name.startswith("RN"): assert len(feats_loss_weights) == 5
+            # load visual loss wrapper
+            self.visual_loss_fn = CLIPVisualLossWrapper(self.visual_encoder, feats_loss_type,
+                                                        feats_loss_weights,
+                                                        fc_loss_weight)
+    @property
+    def input_resolution(self):
+        return self.model.visual.input_resolution  # default: 224
+    @property
+    def resize(self):  # Resize only
+        return transforms.Compose([self.preprocess.transforms[0]])
+    @property
+    def normalize(self):
+        return transforms.Compose([
+            self.preprocess.transforms[0],  # Resize
+            self.preprocess.transforms[1],  # CenterCrop
+            self.preprocess.transforms[-1],  # Normalize
+        ])
+    @property
+    def norm_(self):  # Normalize only
+        return transforms.Compose([self.preprocess.transforms[-1]])
+    def encode_image_layer_wise(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+        semantic_vec, feature_maps = self.visual_encoder(x)
+        return semantic_vec, feature_maps
+    def encode_text(self, text: Union[str, List[str]], norm: bool = True) -> torch.Tensor:
+        tokens = self.tokenize_fn(text).to(self.device)
+        text_features = self.model.encode_text(tokens)
+        if norm:
+            text_features = text_features.mean(axis=0, keepdim=True)
+            text_features_norm = text_features / text_features.norm(dim=-1, keepdim=True)
+            return text_features_norm
+        return text_features
+    def encode_image(self, image: torch.Tensor, norm: bool = True) -> torch.Tensor:
+        image_features = self.model.encode_image(image)
+        if norm:
+            image_features_norm = image_features / image_features.norm(dim=-1, keepdim=True)
+            return image_features_norm
+        return image_features
+    @torch.no_grad()
+    def predict(self,
+                image: torch.Tensor,
+                text: Union[str, List[str]]) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray]:
+        image_features = self.model.encode_image(image)
+        text_tokenize = self.tokenize_fn(text).to(self.device)
+        text_features = self.model.encode_text(text_tokenize)
+        logits_per_image, logits_per_text = self.model(image, text)
+        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+        return image_features, text_features, probs
+    def compute_text_visual_distance(
+            self, image: torch.Tensor, text: Union[str, List[str]]
+    ) -> torch.Tensor:
+        image_features = self.model.encode_image(image)
+        text_tokenize = self.tokenize_fn(text).to(self.device)
+        with torch.no_grad():
+            text_features = self.model.encode_text(text_tokenize)
+        image_features_norm = image_features / image_features.norm(dim=-1, keepdim=True)
+        text_features_norm = text_features / text_features.norm(dim=-1, keepdim=True)
+        # loss = - (image_features_norm @ text_features_norm.T)
+        loss = 1 - torch.cosine_similarity(image_features_norm, text_features_norm, dim=1)
+        return loss.mean()
+    def directional_text_visual_distance(self, src_text, src_img, tar_text, tar_img):
+        src_image_features = self.model.encode_image(src_img).detach()
+        tar_image_features = self.model.encode_image(tar_img)
+        src_text_tokenize = self.tokenize_fn(src_text).to(self.device)
+        tar_text_tokenize = self.tokenize_fn(tar_text).to(self.device)
+        with torch.no_grad():
+            src_text_features = self.model.encode_text(src_text_tokenize)
+            tar_text_features = self.model.encode_text(tar_text_tokenize)
+        delta_image_features = tar_image_features - src_image_features
+        delta_text_features = tar_text_features - src_text_features
+        # # avold zero divisor
+        # delta_image_features_norm = delta_image_features / delta_image_features.norm(dim=-1, keepdim=True)
+        # delta_text_features_norm = delta_text_features / delta_text_features.norm(dim=-1, keepdim=True)
+        loss = 1 - torch.cosine_similarity(delta_image_features, delta_text_features, dim=1, eps=1e-3)
+        return loss.mean()
+    def compute_visual_distance(
+            self, x: torch.Tensor, y: torch.Tensor, clip_norm: bool = True,
+    ) -> Tuple[torch.Tensor, List]:
+        # return a fc loss and the list of feat loss
+        assert self.visual_score is True
+        assert x.shape == y.shape
+        assert x.shape[-1] == self.input_resolution and x.shape[-2] == self.input_resolution
+        assert y.shape[-1] == self.input_resolution and y.shape[-2] == self.input_resolution
+        if clip_norm:
+            return self.visual_loss_fn(self.normalize(x), self.normalize(y))
+        else:
+            return self.visual_loss_fn(x, y)
+class VisualEncoderWrapper(nn.Module):
+    """
+    semantic features and layer by layer feature maps are obtained from CLIP visual encoder.
+    """
+    def __init__(self, clip_model: nn.Module, clip_model_name: str):
+        super().__init__()
+        self.clip_model = clip_model
+        self.clip_model_name = clip_model_name
+        if clip_model_name.startswith("ViT"):
+            self.feature_maps = OrderedDict()
+            for i in range(12):  # 12 ResBlocks in ViT visual transformer
+                self.clip_model.visual.transformer.resblocks[i].register_forward_hook(
+                    self.make_hook(i)
+                )
+        if clip_model_name.startswith("RN"):
+            layers = list(self.clip_model.visual.children())
+            init_layers = torch.nn.Sequential(*layers)[:8]
+            self.layer1 = layers[8]
+            self.layer2 = layers[9]
+            self.layer3 = layers[10]
+            self.layer4 = layers[11]
+            self.att_pool2d = layers[12]
+    def make_hook(self, name):
+        def hook(module, input, output):
+            if len(output.shape) == 3:
+                # LND -> NLD (B, 77, 768)
+                self.feature_maps[name] = output.permute(1, 0, 2)
+            else:
+                self.feature_maps[name] = output
+        return hook
+    def _forward_vit(self, x: torch.Tensor) -> Tuple[torch.Tensor, List]:
+        fc_feature = self.clip_model.encode_image(x).float()
+        feature_maps = [self.feature_maps[k] for k in range(12)]
+        # fc_feature len: 1 ,feature_maps len: 12
+        return fc_feature, feature_maps
+    def _forward_resnet(self, x: torch.Tensor) -> Tuple[torch.Tensor, List]:
+        def stem(m, x):
+            for conv, bn, relu in [(m.conv1, m.bn1, m.relu1), (m.conv2, m.bn2, m.relu2), (m.conv3, m.bn3, m.relu3)]:
+                x = torch.relu(bn(conv(x)))
+            x = m.avgpool(x)
+            return x
+        x = x.type(self.clip_model.visual.conv1.weight.dtype)
+        x = stem(self.clip_model.visual, x)
+        x1 = self.layer1(x)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2)
+        x4 = self.layer4(x3)
+        y = self.att_pool2d(x4)
+        # fc_features len: 1 ,feature_maps len: 5
+        return y, [x, x1, x2, x3, x4]
+    def forward(self, x) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+        if self.clip_model_name.startswith("ViT"):
+            fc_feat, visual_feat_maps = self._forward_vit(x)
+        if self.clip_model_name.startswith("RN"):
+            fc_feat, visual_feat_maps = self._forward_resnet(x)
+        return fc_feat, visual_feat_maps
+class CLIPVisualLossWrapper(nn.Module):
+    """
+    Visual Feature Loss + FC loss
+    """
+    def __init__(
+            self,
+            visual_encoder: nn.Module,
+            feats_loss_type: str = None,
+            feats_loss_weights: List[float] = None,
+            fc_loss_weight: float = None,
+    ):
+        super().__init__()
+        self.visual_encoder = visual_encoder
+        self.feats_loss_weights = feats_loss_weights
+        self.fc_loss_weight = fc_loss_weight
+        self.layer_criterion = layer_wise_distance(feats_loss_type)
+    def forward(self, x: torch.Tensor, y: torch.Tensor):
+        x_fc_feature, x_feat_maps = self.visual_encoder(x)
+        y_fc_feature, y_feat_maps = self.visual_encoder(y)
+        # visual feature loss
+        if sum(self.feats_loss_weights) == 0:
+            feats_loss_list = [torch.tensor(0, device=x.device)]
+        else:
+            feats_loss = self.layer_criterion(x_feat_maps, y_feat_maps, self.visual_encoder.clip_model_name)
+            feats_loss_list = []
+            for layer, w in enumerate(self.feats_loss_weights):
+                if w:
+                    feats_loss_list.append(feats_loss[layer] * w)
+        # visual fc loss, default: cosine similarity
+        if self.fc_loss_weight == 0:
+            fc_loss = torch.tensor(0, device=x.device)
+        else:
+            fc_loss = (1 - torch.cosine_similarity(x_fc_feature, y_fc_feature, dim=1)).mean()
+            fc_loss = fc_loss * self.fc_loss_weight
+        return fc_loss, feats_loss_list
+#################################################################################
+#                                layer wise metric                              #
+#################################################################################
+def layer_wise_distance(metric_name: str):
+    return {
+        "l1": l1_layer_wise,
+        "l2": l2_layer_wise,
+        "cosine": cosine_layer_wise
+    }.get(metric_name.lower())
+def l2_layer_wise(x_features, y_features, clip_model_name):
+    return [
+        torch.square(x_conv - y_conv).mean()
+        for x_conv, y_conv in zip(x_features, y_features)
+    ]
+def l1_layer_wise(x_features, y_features, clip_model_name):
+    return [
+        torch.abs(x_conv - y_conv).mean()
+        for x_conv, y_conv in zip(x_features, y_features)
+    ]
+def cosine_layer_wise(x_features, y_features, clip_model_name):
+    if clip_model_name.startswith("RN"):
+        return [
+            (1 - torch.cosine_similarity(x_conv, y_conv, dim=1)).mean()
+            for x_conv, y_conv in zip(x_features, y_features)
+        ]
+    return [
+        (1 - torch.cosine_similarity(x_conv, y_conv, dim=1)).mean()
+        for x_conv, y_conv in zip(x_features, y_features)
+    ]

libs/metric/lpips_origin/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .lpips import LPIPS
2	+
3	+ __all__ = ['LPIPS']

libs/metric/lpips_origin/lpips.py ADDED Viewed

	@@ -0,0 +1,184 @@

+from __future__ import absolute_import
+import os
+import torch
+import torch.nn as nn
+from . import pretrained_networks as pretrained_torch_models
+def spatial_average(x, keepdim=True):
+    return x.mean([2, 3], keepdim=keepdim)
+def upsample(x):
+    return nn.Upsample(size=x.shape[2:], mode='bilinear', align_corners=False)(x)
+def normalize_tensor(in_feat, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(in_feat ** 2, dim=1, keepdim=True))
+    return in_feat / (norm_factor + eps)
+# Learned perceptual metric
+class LPIPS(nn.Module):
+    def __init__(self,
+                 pretrained=True,
+                 net='alex',
+                 version='0.1',
+                 lpips=True,
+                 spatial=False,
+                 pnet_rand=False,
+                 pnet_tune=False,
+                 use_dropout=True,
+                 model_path=None,
+                 eval_mode=True,
+                 verbose=True):
+        """ Initializes a perceptual loss torch.nn.Module
+        Parameters (default listed first)
+        ---------------------------------
+        lpips : bool
+            [True] use linear layers on top of base/trunk network
+            [False] means no linear layers; each layer is averaged together
+        pretrained : bool
+            This flag controls the linear layers, which are only in effect when lpips=True above
+            [True] means linear layers are calibrated with human perceptual judgments
+            [False] means linear layers are randomly initialized
+        pnet_rand : bool
+            [False] means trunk loaded with ImageNet classification weights
+            [True] means randomly initialized trunk
+        net : str
+            ['alex','vgg','squeeze'] are the base/trunk networks available
+        version : str
+            ['v0.1'] is the default and latest
+            ['v0.0'] contained a normalization bug; corresponds to old arxiv v1 (https://arxiv.org/abs/1801.03924v1)
+        model_path : 'str'
+            [None] is default and loads the pretrained weights from paper https://arxiv.org/abs/1801.03924v1
+        The following parameters should only be changed if training the network:
+        eval_mode : bool
+            [True] is for test mode (default)
+            [False] is for training mode
+        pnet_tune
+            [False] keep base/trunk frozen
+            [True] tune the base/trunk network
+        use_dropout : bool
+            [True] to use dropout when training linear layers
+            [False] for no dropout when training linear layers
+        """
+        super(LPIPS, self).__init__()
+        if verbose:
+            print('Setting up [%s] perceptual loss: trunk [%s], v[%s], spatial [%s]' %
+                  ('LPIPS' if lpips else 'baseline', net, version, 'on' if spatial else 'off'))
+        self.pnet_type = net
+        self.pnet_tune = pnet_tune
+        self.pnet_rand = pnet_rand
+        self.spatial = spatial
+        self.lpips = lpips  # false means baseline of just averaging all layers
+        self.version = version
+        self.scaling_layer = ScalingLayer()
+        if self.pnet_type in ['vgg', 'vgg16']:
+            net_type = pretrained_torch_models.vgg16
+            self.chns = [64, 128, 256, 512, 512]
+        elif self.pnet_type == 'alex':
+            net_type = pretrained_torch_models.alexnet
+            self.chns = [64, 192, 384, 256, 256]
+        elif self.pnet_type == 'squeeze':
+            net_type = pretrained_torch_models.squeezenet
+            self.chns = [64, 128, 256, 384, 384, 512, 512]
+        self.L = len(self.chns)
+        self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune)
+        if lpips:
+            self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
+            self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
+            self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
+            self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
+            self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
+            self.lins = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
+            if self.pnet_type == 'squeeze':  # 7 layers for squeezenet
+                self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
+                self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
+                self.lins += [self.lin5, self.lin6]
+            self.lins = nn.ModuleList(self.lins)
+            if pretrained:
+                if model_path is None:
+                    model_path = os.path.join(
+                        os.path.dirname(os.path.abspath(__file__)),
+                        f"weights/v{version}/{net}.pth"
+                    )
+                if verbose:
+                    print('Loading model from: %s' % model_path)
+                self.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False)
+        if eval_mode:
+            self.eval()
+    def forward(self, in0, in1, return_per_layer=False, normalize=False):
+        if normalize:  # turn on this flag if input is [0,1] so it can be adjusted to [-1, 1]
+            in0 = 2 * in0 - 1
+            in1 = 2 * in1 - 1
+        # Noting: v0.0 - original release had a bug, where input was not scaled
+        if self.version == '0.1':
+            in0_input, in1_input = (self.scaling_layer(in0), self.scaling_layer(in1))
+        else:
+            in0_input, in1_input = in0, in1
+        # model forward
+        outs0, outs1 = self.net.forward(in0_input), self.net.forward(in1_input)
+        feats0, feats1, diffs = {}, {}, {}
+        for kk in range(self.L):
+            feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(outs1[kk])
+            diffs[kk] = (feats0[kk] - feats1[kk]) ** 2
+        if self.lpips:
+            if self.spatial:
+                res = [upsample(self.lins[kk](diffs[kk])) for kk in range(self.L)]
+            else:
+                res = [spatial_average(self.lins[kk](diffs[kk]), keepdim=True) for kk in range(self.L)]
+        else:
+            if self.spatial:
+                res = [upsample(diffs[kk].sum(dim=1, keepdim=True)) for kk in range(self.L)]
+            else:
+                res = [spatial_average(diffs[kk].sum(dim=1, keepdim=True), keepdim=True) for kk in range(self.L)]
+        loss = sum(res)
+        if return_per_layer:
+            return loss, res
+        else:
+            return loss
+class ScalingLayer(nn.Module):
+    def __init__(self):
+        super(ScalingLayer, self).__init__()
+        self.register_buffer('shift', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
+        self.register_buffer('scale', torch.Tensor([.458, .448, .450])[None, :, None, None])
+    def forward(self, inp):
+        return (inp - self.shift) / self.scale
+class NetLinLayer(nn.Module):
+    """A single linear layer which does a 1x1 conv"""
+    def __init__(self, chn_in, chn_out=1, use_dropout=False):
+        super(NetLinLayer, self).__init__()
+        layers = [nn.Dropout(), ] if (use_dropout) else []
+        layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False), ]
+        self.model = nn.Sequential(*layers)
+    def forward(self, x):
+        return self.model(x)

libs/metric/lpips_origin/pretrained_networks.py ADDED Viewed

	@@ -0,0 +1,196 @@

+from collections import namedtuple
+import torch
+import torchvision.models as tv_models
+class squeezenet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(squeezenet, self).__init__()
+        pretrained_features = tv_models.squeezenet1_1(weights=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.slice6 = torch.nn.Sequential()
+        self.slice7 = torch.nn.Sequential()
+        self.N_slices = 7
+        for x in range(2):
+            self.slice1.add_module(str(x), pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), pretrained_features[x])
+        for x in range(10, 11):
+            self.slice5.add_module(str(x), pretrained_features[x])
+        for x in range(11, 12):
+            self.slice6.add_module(str(x), pretrained_features[x])
+        for x in range(12, 13):
+            self.slice7.add_module(str(x), pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        h = self.slice6(h)
+        h_relu6 = h
+        h = self.slice7(h)
+        h_relu7 = h
+        vgg_outputs = namedtuple("SqueezeOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5', 'relu6', 'relu7'])
+        out = vgg_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5, h_relu6, h_relu7)
+        return out
+class alexnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(alexnet, self).__init__()
+        weights = tv_models.AlexNet_Weights.IMAGENET1K_V1 if pretrained else None
+        alexnet_pretrained_features = tv_models.alexnet(weights=weights).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(2):
+            self.slice1.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(10, 12):
+            self.slice5.add_module(str(x), alexnet_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
+        out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
+        return out
+class vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(vgg16, self).__init__()
+        weights = tv_models.VGG16_Weights.IMAGENET1K_V1 if pretrained else None
+        vgg_pretrained_features = tv_models.vgg16(weights=weights).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(23, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        h = self.slice5(h)
+        h_relu5_3 = h
+        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
+        return out
+class resnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True, num=18):
+        super(resnet, self).__init__()
+        if num == 18:
+            weights = tv_models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None
+            self.net = tv_models.resnet18(weights=weights)
+        elif num == 34:
+            weights = tv_models.ResNet34_Weights.IMAGENET1K_V1 if pretrained else None
+            self.net = tv_models.resnet34(weights=weights)
+        elif num == 50:
+            weights = tv_models.ResNet50_Weights.IMAGENET1K_V2 if pretrained else None
+            self.net = tv_models.resnet50(weights=weights)
+        elif num == 101:
+            weights = tv_models.ResNet101_Weights.IMAGENET1K_V2 if pretrained else None
+            self.net = tv_models.resnet101(weights=weights)
+        elif num == 152:
+            weights = tv_models.ResNet152_Weights.IMAGENET1K_V2 if pretrained else None
+            self.net = tv_models.resnet152(weights=weights)
+        self.N_slices = 5
+        if not requires_grad:
+            for param in self.net.parameters():
+                param.requires_grad = False
+        self.conv1 = self.net.conv1
+        self.bn1 = self.net.bn1
+        self.relu = self.net.relu
+        self.maxpool = self.net.maxpool
+        self.layer1 = self.net.layer1
+        self.layer2 = self.net.layer2
+        self.layer3 = self.net.layer3
+        self.layer4 = self.net.layer4
+    def forward(self, X):
+        h = self.conv1(X)
+        h = self.bn1(h)
+        h = self.relu(h)
+        h_relu1 = h
+        h = self.maxpool(h)
+        h = self.layer1(h)
+        h_conv2 = h
+        h = self.layer2(h)
+        h_conv3 = h
+        h = self.layer3(h)
+        h_conv4 = h
+        h = self.layer4(h)
+        h_conv5 = h
+        outputs = namedtuple("Outputs", ['relu1', 'conv2', 'conv3', 'conv4', 'conv5'])
+        out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
+        return out

libs/metric/lpips_origin/weights/v0.1/alex.pth ADDED Viewed

Binary file (6.01 kB). View file

libs/metric/lpips_origin/weights/v0.1/squeeze.pth ADDED Viewed

Binary file (10.8 kB). View file

libs/metric/lpips_origin/weights/v0.1/vgg.pth ADDED Viewed

Binary file (7.29 kB). View file

libs/metric/piq/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # install: pip install piq
2	+ # repo: https://github.com/photosynthesis-team/piq

libs/metric/piq/functional/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from .base import ifftshift, get_meshgrid, similarity_map, gradient_map, pow_for_complex, crop_patches
+from .colour_conversion import rgb2lmn, rgb2xyz, xyz2lab, rgb2lab, rgb2yiq, rgb2lhm
+from .filters import haar_filter, hann_filter, scharr_filter, prewitt_filter, gaussian_filter
+from .filters import binomial_filter1d, average_filter2d
+from .layers import L2Pool2d
+from .resize import imresize
+__all__ = [
+    'ifftshift', 'get_meshgrid', 'similarity_map', 'gradient_map', 'pow_for_complex', 'crop_patches',
+    'rgb2lmn', 'rgb2xyz', 'xyz2lab', 'rgb2lab', 'rgb2yiq', 'rgb2lhm',
+    'haar_filter', 'hann_filter', 'scharr_filter', 'prewitt_filter', 'gaussian_filter',
+    'binomial_filter1d', 'average_filter2d',
+    'L2Pool2d',
+    'imresize',
+]

libs/metric/piq/functional/base.py ADDED Viewed

	@@ -0,0 +1,111 @@

+r"""General purpose functions"""
+from typing import Tuple, Union, Optional
+import torch
+from ..utils import _parse_version
+def ifftshift(x: torch.Tensor) -> torch.Tensor:
+    r""" Similar to np.fft.ifftshift but applies to PyTorch Tensors"""
+    shift = [-(ax // 2) for ax in x.size()]
+    return torch.roll(x, shift, tuple(range(len(shift))))
+def get_meshgrid(size: Tuple[int, int], device: Optional[str] = None, dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Return coordinate grid matrices centered at zero point.
+    Args:
+        size: Shape of meshgrid to create
+        device: device to use for creation
+        dtype: dtype to use for creation
+    Returns:
+        Meshgrid of size on device with dtype values.
+    """
+    if size[0] % 2:
+        # Odd
+        x = torch.arange(-(size[0] - 1) / 2, size[0] / 2, device=device, dtype=dtype) / (size[0] - 1)
+    else:
+        # Even
+        x = torch.arange(- size[0] / 2, size[0] / 2, device=device, dtype=dtype) / size[0]
+    if size[1] % 2:
+        # Odd
+        y = torch.arange(-(size[1] - 1) / 2, size[1] / 2, device=device, dtype=dtype) / (size[1] - 1)
+    else:
+        # Even
+        y = torch.arange(- size[1] / 2, size[1] / 2, device=device, dtype=dtype) / size[1]
+    # Use indexing param depending on torch version
+    recommended_torch_version = _parse_version("1.10.0")
+    torch_version = _parse_version(torch.__version__)
+    if len(torch_version) > 0 and torch_version >= recommended_torch_version:
+        return torch.meshgrid(x, y, indexing='ij')
+    return torch.meshgrid(x, y)
+def similarity_map(map_x: torch.Tensor, map_y: torch.Tensor, constant: float, alpha: float = 0.0) -> torch.Tensor:
+    r""" Compute similarity_map between two tensors using Dice-like equation.
+    Args:
+        map_x: Tensor with map to be compared
+        map_y: Tensor with map to be compared
+        constant: Used for numerical stability
+        alpha: Masking coefficient. Subtracts - `alpha` * map_x * map_y from denominator and nominator
+    """
+    return (2.0 * map_x * map_y - alpha * map_x * map_y + constant) / \
+           (map_x ** 2 + map_y ** 2 - alpha * map_x * map_y + constant)
+def gradient_map(x: torch.Tensor, kernels: torch.Tensor) -> torch.Tensor:
+    r""" Compute gradient map for a given tensor and stack of kernels.
+    Args:
+        x: Tensor with shape (N, C, H, W).
+        kernels: Stack of tensors for gradient computation with shape (k_N, k_H, k_W)
+    Returns:
+        Gradients of x per-channel with shape (N, C, H, W)
+    """
+    padding = kernels.size(-1) // 2
+    grads = torch.nn.functional.conv2d(x, kernels, padding=padding)
+    return torch.sqrt(torch.sum(grads ** 2, dim=-3, keepdim=True))
+def pow_for_complex(base: torch.Tensor, exp: Union[int, float]) -> torch.Tensor:
+    r""" Takes the power of each element in a 4D tensor with negative values or 5D tensor with complex values.
+    Complex numbers are represented by modulus and argument: r * \exp(i * \phi).
+    It will likely to be redundant with introduction of torch.ComplexTensor.
+    Args:
+        base: Tensor with shape (N, C, H, W) or (N, C, H, W, 2).
+        exp: Exponent
+    Returns:
+        Complex tensor with shape (N, C, H, W, 2).
+    """
+    if base.dim() == 4:
+        x_complex_r = base.abs()
+        x_complex_phi = torch.atan2(torch.zeros_like(base), base)
+    elif base.dim() == 5 and base.size(-1) == 2:
+        x_complex_r = base.pow(2).sum(dim=-1).sqrt()
+        x_complex_phi = torch.atan2(base[..., 1], base[..., 0])
+    else:
+        raise ValueError(f'Expected real or complex tensor, got {base.size()}')
+    x_complex_pow_r = x_complex_r ** exp
+    x_complex_pow_phi = x_complex_phi * exp
+    x_real_pow = x_complex_pow_r * torch.cos(x_complex_pow_phi)
+    x_imag_pow = x_complex_pow_r * torch.sin(x_complex_pow_phi)
+    return torch.stack((x_real_pow, x_imag_pow), dim=-1)
+def crop_patches(x: torch.Tensor, size=64, stride=32) -> torch.Tensor:
+    r"""Crop tensor with images into small patches
+    Args:
+        x: Tensor with shape (N, C, H, W), expected to be images-like entities
+        size: Size of a square patch
+        stride: Step between patches
+    """
+    assert (x.shape[2] >= size) and (x.shape[3] >= size), \
+        f"Images must be bigger than patch size. Got ({x.shape[2], x.shape[3]}) and ({size}, {size})"
+    channels = x.shape[1]
+    patches = x.unfold(1, channels, channels).unfold(2, size, stride).unfold(3, size, stride)
+    patches = patches.reshape(-1, channels, size, size)
+    return patches

libs/metric/piq/functional/colour_conversion.py ADDED Viewed

	@@ -0,0 +1,136 @@

+r"""Colour space conversion functions"""
+from typing import Union, Dict
+import torch
+def rgb2lmn(x: torch.Tensor) -> torch.Tensor:
+    r"""Convert a batch of RGB images to a batch of LMN images
+    Args:
+        x: Batch of images with shape (N, 3, H, W). RGB colour space.
+    Returns:
+        Batch of images with shape (N, 3, H, W). LMN colour space.
+    """
+    weights_rgb_to_lmn = torch.tensor([[0.06, 0.63, 0.27],
+                                       [0.30, 0.04, -0.35],
+                                       [0.34, -0.6, 0.17]], dtype=x.dtype, device=x.device).t()
+    x_lmn = torch.matmul(x.permute(0, 2, 3, 1), weights_rgb_to_lmn).permute(0, 3, 1, 2)
+    return x_lmn
+def rgb2xyz(x: torch.Tensor) -> torch.Tensor:
+    r"""Convert a batch of RGB images to a batch of XYZ images
+    Args:
+        x: Batch of images with shape (N, 3, H, W). RGB colour space.
+    Returns:
+        Batch of images with shape (N, 3, H, W). XYZ colour space.
+    """
+    mask_below = (x <= 0.04045).type(x.dtype)
+    mask_above = (x > 0.04045).type(x.dtype)
+    tmp = x / 12.92 * mask_below + torch.pow((x + 0.055) / 1.055, 2.4) * mask_above
+    weights_rgb_to_xyz = torch.tensor([[0.4124564, 0.3575761, 0.1804375],
+                                       [0.2126729, 0.7151522, 0.0721750],
+                                       [0.0193339, 0.1191920, 0.9503041]], dtype=x.dtype, device=x.device)
+    x_xyz = torch.matmul(tmp.permute(0, 2, 3, 1), weights_rgb_to_xyz.t()).permute(0, 3, 1, 2)
+    return x_xyz
+def xyz2lab(x: torch.Tensor, illuminant: str = 'D50', observer: str = '2') -> torch.Tensor:
+    r"""Convert a batch of XYZ images to a batch of LAB images
+    Args:
+        x: Batch of images with shape (N, 3, H, W). XYZ colour space.
+        illuminant: {“A”, “D50”, “D55”, “D65”, “D75”, “E”}, optional. The name of the illuminant.
+        observer: {“2”, “10”}, optional. The aperture angle of the observer.
+    Returns:
+        Batch of images with shape (N, 3, H, W). LAB colour space.
+    """
+    epsilon = 0.008856
+    kappa = 903.3
+    illuminants: Dict[str, Dict] = \
+        {"A": {'2': (1.098466069456375, 1, 0.3558228003436005),
+               '10': (1.111420406956693, 1, 0.3519978321919493)},
+         "D50": {'2': (0.9642119944211994, 1, 0.8251882845188288),
+                 '10': (0.9672062750333777, 1, 0.8142801513128616)},
+         "D55": {'2': (0.956797052643698, 1, 0.9214805860173273),
+                 '10': (0.9579665682254781, 1, 0.9092525159847462)},
+         "D65": {'2': (0.95047, 1., 1.08883),  # This was: `lab_ref_white`
+                 '10': (0.94809667673716, 1, 1.0730513595166162)},
+         "D75": {'2': (0.9497220898840717, 1, 1.226393520724154),
+                 '10': (0.9441713925645873, 1, 1.2064272211720228)},
+         "E": {'2': (1.0, 1.0, 1.0),
+               '10': (1.0, 1.0, 1.0)}}
+    illuminants_to_use = torch.tensor(illuminants[illuminant][observer],
+                                      dtype=x.dtype, device=x.device).view(1, 3, 1, 1)
+    tmp = x / illuminants_to_use
+    mask_below = (tmp <= epsilon).type(x.dtype)
+    mask_above = (tmp > epsilon).type(x.dtype)
+    tmp = torch.pow(tmp, 1. / 3.) * mask_above + (kappa * tmp + 16.) / 116. * mask_below
+    weights_xyz_to_lab = torch.tensor([[0, 116., 0],
+                                       [500., -500., 0],
+                                       [0, 200., -200.]], dtype=x.dtype, device=x.device)
+    bias_xyz_to_lab = torch.tensor([-16., 0., 0.], dtype=x.dtype, device=x.device).view(1, 3, 1, 1)
+    x_lab = torch.matmul(tmp.permute(0, 2, 3, 1), weights_xyz_to_lab.t()).permute(0, 3, 1, 2) + bias_xyz_to_lab
+    return x_lab
+def rgb2lab(x: torch.Tensor, data_range: Union[int, float] = 255) -> torch.Tensor:
+    r"""Convert a batch of RGB images to a batch of LAB images
+    Args:
+        x: Batch of images with shape (N, 3, H, W). RGB colour space.
+        data_range: dynamic range of the input image.
+    Returns:
+        Batch of images with shape (N, 3, H, W). LAB colour space.
+    """
+    return xyz2lab(rgb2xyz(x / float(data_range)))
+def rgb2yiq(x: torch.Tensor) -> torch.Tensor:
+    r"""Convert a batch of RGB images to a batch of YIQ images
+    Args:
+        x: Batch of images with shape (N, 3, H, W). RGB colour space.
+    Returns:
+        Batch of images with shape (N, 3, H, W). YIQ colour space.
+    """
+    yiq_weights = torch.tensor([
+        [0.299, 0.587, 0.114],
+        [0.5959, -0.2746, -0.3213],
+        [0.2115, -0.5227, 0.3112]], dtype=x.dtype, device=x.device).t()
+    x_yiq = torch.matmul(x.permute(0, 2, 3, 1), yiq_weights).permute(0, 3, 1, 2)
+    return x_yiq
+def rgb2lhm(x: torch.Tensor) -> torch.Tensor:
+    r"""Convert a batch of RGB images to a batch of LHM images
+    Args:
+        x: Batch of images with shape (N, 3, H, W). RGB colour space.
+    Returns:
+        Batch of images with shape (N, 3, H, W). LHM colour space.
+    Reference:
+        https://arxiv.org/pdf/1608.07433.pdf
+    """
+    lhm_weights = torch.tensor([
+        [0.2989, 0.587, 0.114],
+        [0.3, 0.04, -0.35],
+        [0.34, -0.6, 0.17]], dtype=x.dtype, device=x.device).t()
+    x_lhm = torch.matmul(x.permute(0, 2, 3, 1), lhm_weights).permute(0, 3, 1, 2)
+    return x_lhm

libs/metric/piq/functional/filters.py ADDED Viewed

	@@ -0,0 +1,111 @@

+r"""Filters for gradient computation, bluring, etc."""
+import torch
+import numpy as np
+from typing import Optional
+def haar_filter(kernel_size: int, device: Optional[str] = None, dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Creates Haar kernel
+    Args:
+        kernel_size: size of the kernel
+        device: target device for kernel generation
+        dtype: target data type for kernel generation
+    Returns:
+        kernel: Tensor with shape (1, kernel_size, kernel_size)
+    """
+    kernel = torch.ones((kernel_size, kernel_size), device=device, dtype=dtype) / kernel_size
+    kernel[kernel_size // 2:, :] = - kernel[kernel_size // 2:, :]
+    return kernel.unsqueeze(0)
+def hann_filter(kernel_size: int, device: Optional[str] = None, dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Creates  Hann kernel
+    Args:
+        kernel_size: size of the kernel
+        device: target device for kernel generation
+        dtype: target data type for kernel generation
+    Returns:
+        kernel: Tensor with shape (1, kernel_size, kernel_size)
+    """
+    # Take bigger window and drop borders
+    window = torch.hann_window(kernel_size + 2, periodic=False, device=device, dtype=dtype)[1:-1]
+    kernel = window[:, None] * window[None, :]
+    # Normalize and reshape kernel
+    return kernel.view(1, kernel_size, kernel_size) / kernel.sum()
+def gaussian_filter(kernel_size: int, sigma: float, device: Optional[str] = None,
+                    dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Returns 2D Gaussian kernel N(0,`sigma`^2)
+    Args:
+        size: Size of the kernel
+        sigma: Std of the distribution
+        device: target device for kernel generation
+        dtype: target data type for kernel generation
+    Returns:
+        gaussian_kernel: Tensor with shape (1, kernel_size, kernel_size)
+    """
+    coords = torch.arange(kernel_size, dtype=dtype, device=device)
+    coords -= (kernel_size - 1) / 2.
+    g = coords ** 2
+    g = (- (g.unsqueeze(0) + g.unsqueeze(1)) / (2 * sigma ** 2)).exp()
+    g /= g.sum()
+    return g.unsqueeze(0)
+# Gradient operator kernels
+def scharr_filter(device: Optional[str] = None, dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Utility function that returns a normalized 3x3 Scharr kernel in X direction
+    Args:
+        device: target device for kernel generation
+        dtype: target data type for kernel generation
+    Returns:
+        kernel: Tensor with shape (1, 3, 3)
+    """
+    return torch.tensor([[[-3., 0., 3.], [-10., 0., 10.], [-3., 0., 3.]]], device=device, dtype=dtype) / 16
+def prewitt_filter(device: Optional[str] = None, dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Utility function that returns a normalized 3x3 Prewitt kernel in X direction
+    Args:
+        device: target device for kernel generation
+        dtype: target data type for kernel generation
+    Returns:
+        kernel: Tensor with shape (1, 3, 3)"""
+    return torch.tensor([[[-1., 0., 1.], [-1., 0., 1.], [-1., 0., 1.]]], device=device, dtype=dtype) / 3
+def binomial_filter1d(kernel_size: int, device: Optional[str] = None, dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Creates 1D normalized binomial filter
+    Args:
+        kernel_size (int): kernel size
+        device: target device for kernel generation
+        dtype: target data type for kernel generation
+    Returns:
+        Binomial kernel with shape (1, 1, kernel_size)
+    """
+    kernel = np.poly1d([0.5, 0.5]) ** (kernel_size - 1)
+    return torch.tensor(kernel.c, dtype=dtype, device=device).view(1, 1, kernel_size)
+def average_filter2d(kernel_size: int, device: Optional[str] = None, dtype: Optional[type] = None) -> torch.Tensor:
+    r"""Creates 2D normalized average filter
+    Args:
+        kernel_size (int): kernel size
+        device: target device for kernel generation
+        dtype: target data type for kernel generation
+    Returns:
+        kernel: Tensor with shape (1, kernel_size, kernel_size)
+    """
+    window = torch.ones(kernel_size, dtype=dtype, device=device) / kernel_size
+    kernel = window[:, None] * window[None, :]
+    return kernel.unsqueeze(0)

libs/metric/piq/functional/layers.py ADDED Viewed

	@@ -0,0 +1,33 @@

+r"""Custom layers used in metrics computations"""
+import torch
+from typing import Optional
+from .filters import hann_filter
+class L2Pool2d(torch.nn.Module):
+    r"""Applies L2 pooling with Hann window of size 3x3
+    Args:
+        x: Tensor with shape (N, C, H, W)"""
+    EPS = 1e-12
+    def __init__(self, kernel_size: int = 3, stride: int = 2, padding=1) -> None:
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.kernel: Optional[torch.Tensor] = None
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.kernel is None:
+            C = x.size(1)
+            self.kernel = hann_filter(self.kernel_size).repeat((C, 1, 1, 1)).to(x)
+        out = torch.nn.functional.conv2d(
+            x ** 2, self.kernel,
+            stride=self.stride,
+            padding=self.padding,
+            groups=x.shape[1]
+        )
+        return (out + self.EPS).sqrt()

libs/metric/piq/functional/resize.py ADDED Viewed

	@@ -0,0 +1,426 @@

+"""
+A standalone PyTorch implementation for fast and efficient bicubic resampling.
+The resulting values are the same to MATLAB function imresize('bicubic').
+## Author:      Sanghyun Son
+## Email:       [email protected] (primary), [email protected] (secondary)
+## Version:     1.2.0
+## Last update: July 9th, 2020 (KST)
+Dependency: torch
+Example::
+>>> import torch
+>>> import core
+>>> x = torch.arange(16).float().view(1, 1, 4, 4)
+>>> y = core.imresize(x, sizes=(3, 3))
+>>> print(y)
+tensor([[[[ 0.7506,  2.1004,  3.4503],
+          [ 6.1505,  7.5000,  8.8499],
+          [11.5497, 12.8996, 14.2494]]]])
+"""
+import math
+import typing
+import torch
+from torch.nn import functional as F
+__all__ = ['imresize']
+_I = typing.Optional[int]
+_D = typing.Optional[torch.dtype]
+def nearest_contribution(x: torch.Tensor) -> torch.Tensor:
+    range_around_0 = torch.logical_and(x.gt(-0.5), x.le(0.5))
+    cont = range_around_0.to(dtype=x.dtype)
+    return cont
+def linear_contribution(x: torch.Tensor) -> torch.Tensor:
+    ax = x.abs()
+    range_01 = ax.le(1)
+    cont = (1 - ax) * range_01.to(dtype=x.dtype)
+    return cont
+def cubic_contribution(x: torch.Tensor, a: float = -0.5) -> torch.Tensor:
+    ax = x.abs()
+    ax2 = ax * ax
+    ax3 = ax * ax2
+    range_01 = ax.le(1)
+    range_12 = torch.logical_and(ax.gt(1), ax.le(2))
+    cont_01 = (a + 2) * ax3 - (a + 3) * ax2 + 1
+    cont_01 = cont_01 * range_01.to(dtype=x.dtype)
+    cont_12 = (a * ax3) - (5 * a * ax2) + (8 * a * ax) - (4 * a)
+    cont_12 = cont_12 * range_12.to(dtype=x.dtype)
+    cont = cont_01 + cont_12
+    return cont
+def gaussian_contribution(x: torch.Tensor, sigma: float = 2.0) -> torch.Tensor:
+    range_3sigma = (x.abs() <= 3 * sigma + 1)
+    # Normalization will be done after
+    cont = torch.exp(-x.pow(2) / (2 * sigma ** 2))
+    cont = cont * range_3sigma.to(dtype=x.dtype)
+    return cont
+def discrete_kernel(
+        kernel: str, scale: float, antialiasing: bool = True) -> torch.Tensor:
+    '''
+    For downsampling with integer scale only.
+    '''
+    downsampling_factor = int(1 / scale)
+    if kernel == 'cubic':
+        kernel_size_orig = 4
+    else:
+        raise ValueError('Pass!')
+    if antialiasing:
+        kernel_size = kernel_size_orig * downsampling_factor
+    else:
+        kernel_size = kernel_size_orig
+    if downsampling_factor % 2 == 0:
+        a = kernel_size_orig * (0.5 - 1 / (2 * kernel_size))
+    else:
+        kernel_size -= 1
+        a = kernel_size_orig * (0.5 - 1 / (kernel_size + 1))
+    with torch.no_grad():
+        r = torch.linspace(-a, a, steps=kernel_size)
+        k = cubic_contribution(r).view(-1, 1)
+        k = torch.matmul(k, k.t())
+        k /= k.sum()
+    return k
+def reflect_padding(
+        x: torch.Tensor,
+        dim: int,
+        pad_pre: int,
+        pad_post: int) -> torch.Tensor:
+    '''
+    Apply reflect padding to the given Tensor.
+    Note that it is slightly different from the PyTorch functional.pad,
+    where boundary elements are used only once.
+    Instead, we follow the MATLAB implementation
+    which uses boundary elements twice.
+    For example,
+    [a, b, c, d] would become [b, a, b, c, d, c] with the PyTorch implementation,
+    while our implementation yields [a, a, b, c, d, d].
+    '''
+    b, c, h, w = x.size()
+    if dim == 2 or dim == -2:
+        padding_buffer = x.new_zeros(b, c, h + pad_pre + pad_post, w)
+        padding_buffer[..., pad_pre:(h + pad_pre), :].copy_(x)
+        for p in range(pad_pre):
+            padding_buffer[..., pad_pre - p - 1, :].copy_(x[..., p, :])
+        for p in range(pad_post):
+            padding_buffer[..., h + pad_pre + p, :].copy_(x[..., -(p + 1), :])
+    else:
+        padding_buffer = x.new_zeros(b, c, h, w + pad_pre + pad_post)
+        padding_buffer[..., pad_pre:(w + pad_pre)].copy_(x)
+        for p in range(pad_pre):
+            padding_buffer[..., pad_pre - p - 1].copy_(x[..., p])
+        for p in range(pad_post):
+            padding_buffer[..., w + pad_pre + p].copy_(x[..., -(p + 1)])
+    return padding_buffer
+def padding(
+        x: torch.Tensor,
+        dim: int,
+        pad_pre: int,
+        pad_post: int,
+        padding_type: typing.Optional[str] = 'reflect') -> torch.Tensor:
+    if padding_type is None:
+        return x
+    elif padding_type == 'reflect':
+        x_pad = reflect_padding(x, dim, pad_pre, pad_post)
+    else:
+        raise ValueError('{} padding is not supported!'.format(padding_type))
+    return x_pad
+def get_padding(
+        base: torch.Tensor,
+        kernel_size: int,
+        x_size: int) -> typing.Tuple[int, int, torch.Tensor]:
+    base = base.long()
+    r_min = base.min()
+    r_max = base.max() + kernel_size - 1
+    if r_min <= 0:
+        pad_pre = -r_min
+        pad_pre = pad_pre.item()
+        base += pad_pre
+    else:
+        pad_pre = 0
+    if r_max >= x_size:
+        pad_post = r_max - x_size + 1
+        pad_post = pad_post.item()
+    else:
+        pad_post = 0
+    return pad_pre, pad_post, base
+def get_weight(
+        dist: torch.Tensor,
+        kernel_size: int,
+        kernel: str = 'cubic',
+        sigma: float = 2.0,
+        antialiasing_factor: float = 1) -> torch.Tensor:
+    buffer_pos = dist.new_zeros(kernel_size, len(dist))
+    for idx, buffer_sub in enumerate(buffer_pos):
+        buffer_sub.copy_(dist - idx)
+    # Expand (downsampling) / Shrink (upsampling) the receptive field.
+    buffer_pos *= antialiasing_factor
+    if kernel == 'cubic':
+        weight = cubic_contribution(buffer_pos)
+    elif kernel == 'gaussian':
+        weight = gaussian_contribution(buffer_pos, sigma=sigma)
+    else:
+        raise ValueError('{} kernel is not supported!'.format(kernel))
+    weight /= weight.sum(dim=0, keepdim=True)
+    return weight
+def reshape_tensor(x: torch.Tensor, dim: int, kernel_size: int) -> torch.Tensor:
+    # Resize height
+    if dim == 2 or dim == -2:
+        k = (kernel_size, 1)
+        h_out = x.size(-2) - kernel_size + 1
+        w_out = x.size(-1)
+    # Resize width
+    else:
+        k = (1, kernel_size)
+        h_out = x.size(-2)
+        w_out = x.size(-1) - kernel_size + 1
+    unfold = F.unfold(x, k)
+    unfold = unfold.view(unfold.size(0), -1, h_out, w_out)
+    return unfold
+def reshape_input(x: torch.Tensor) -> typing.Tuple[torch.Tensor, _I, _I, int, int]:
+    if x.dim() == 4:
+        b, c, h, w = x.size()
+    elif x.dim() == 3:
+        c, h, w = x.size()
+        b = None
+    elif x.dim() == 2:
+        h, w = x.size()
+        b = c = None
+    else:
+        raise ValueError('{}-dim Tensor is not supported!'.format(x.dim()))
+    x = x.view(-1, 1, h, w)
+    return x, b, c, h, w
+def reshape_output(x: torch.Tensor, b: _I, c: _I) -> torch.Tensor:
+    rh = x.size(-2)
+    rw = x.size(-1)
+    # Back to the original dimension
+    if b is not None:
+        x = x.view(b, c, rh, rw)  # 4-dim
+    else:
+        if c is not None:
+            x = x.view(c, rh, rw)  # 3-dim
+        else:
+            x = x.view(rh, rw)  # 2-dim
+    return x
+def cast_input(x: torch.Tensor) -> typing.Tuple[torch.Tensor, _D]:
+    if x.dtype != torch.float32 or x.dtype != torch.float64:
+        dtype = x.dtype
+        x = x.float()
+    else:
+        dtype = None
+    return x, dtype
+def cast_output(x: torch.Tensor, dtype: _D) -> torch.Tensor:
+    if dtype is not None:
+        if not dtype.is_floating_point:
+            x = x.round()
+        # To prevent over/underflow when converting types
+        if dtype is torch.uint8:
+            x = x.clamp(0, 255)
+        x = x.to(dtype=dtype)
+    return x
+def resize_1d(
+        x: torch.Tensor,
+        dim: int,
+        size: int,
+        scale: float,
+        kernel: str = 'cubic',
+        sigma: float = 2.0,
+        padding_type: str = 'reflect',
+        antialiasing: bool = True) -> torch.Tensor:
+    '''
+    Args:
+        x (torch.Tensor): A torch.Tensor of dimension (B x C, 1, H, W).
+        dim (int):
+        scale (float):
+        size (int):
+    Return:
+    '''
+    # Identity case
+    if scale == 1:
+        return x
+    # Default bicubic kernel with antialiasing (only when downsampling)
+    if kernel == 'cubic':
+        kernel_size = 4
+    else:
+        kernel_size = math.floor(6 * sigma)
+    if antialiasing and (scale < 1):
+        antialiasing_factor = scale
+        kernel_size = math.ceil(kernel_size / antialiasing_factor)
+    else:
+        antialiasing_factor = 1
+    # We allow margin to both sizes
+    kernel_size += 2
+    # Weights only depend on the shape of input and output,
+    # so we do not calculate gradients here.
+    with torch.no_grad():
+        pos = torch.linspace(
+            0, size - 1, steps=size, dtype=x.dtype, device=x.device,
+        )
+        pos = (pos + 0.5) / scale - 0.5
+        base = pos.floor() - (kernel_size // 2) + 1
+        dist = pos - base
+        weight = get_weight(
+            dist,
+            kernel_size,
+            kernel=kernel,
+            sigma=sigma,
+            antialiasing_factor=antialiasing_factor,
+        )
+        pad_pre, pad_post, base = get_padding(base, kernel_size, x.size(dim))
+    # To backpropagate through x
+    x_pad = padding(x, dim, pad_pre, pad_post, padding_type=padding_type)
+    unfold = reshape_tensor(x_pad, dim, kernel_size)
+    # Subsampling first
+    if dim == 2 or dim == -2:
+        sample = unfold[..., base, :]
+        weight = weight.view(1, kernel_size, sample.size(2), 1)
+    else:
+        sample = unfold[..., base]
+        weight = weight.view(1, kernel_size, 1, sample.size(3))
+    # Apply the kernel
+    x = sample * weight
+    x = x.sum(dim=1, keepdim=True)
+    return x
+def downsampling_2d(
+        x: torch.Tensor,
+        k: torch.Tensor,
+        scale: int,
+        padding_type: str = 'reflect') -> torch.Tensor:
+    c = x.size(1)
+    k_h = k.size(-2)
+    k_w = k.size(-1)
+    k = k.to(dtype=x.dtype, device=x.device)
+    k = k.view(1, 1, k_h, k_w)
+    k = k.repeat(c, c, 1, 1)
+    e = torch.eye(c, dtype=k.dtype, device=k.device, requires_grad=False)
+    e = e.view(c, c, 1, 1)
+    k = k * e
+    pad_h = (k_h - scale) // 2
+    pad_w = (k_w - scale) // 2
+    x = padding(x, -2, pad_h, pad_h, padding_type=padding_type)
+    x = padding(x, -1, pad_w, pad_w, padding_type=padding_type)
+    y = F.conv2d(x, k, padding=0, stride=scale)
+    return y
+def imresize(
+        x: torch.Tensor,
+        scale: typing.Optional[float] = None,
+        sizes: typing.Optional[typing.Tuple[int, int]] = None,
+        kernel: typing.Union[str, torch.Tensor] = 'cubic',
+        sigma: float = 2,
+        rotation_degree: float = 0,
+        padding_type: str = 'reflect',
+        antialiasing: bool = True) -> torch.Tensor:
+    """
+    Args:
+        x (torch.Tensor):
+        scale (float):
+        sizes (tuple(int, int)):
+        kernel (str, default='cubic'):
+        sigma (float, default=2):
+        rotation_degree (float, default=0):
+        padding_type (str, default='reflect'):
+        antialiasing (bool, default=True):
+    Return:
+        torch.Tensor:
+    """
+    if scale is None and sizes is None:
+        raise ValueError('One of scale or sizes must be specified!')
+    if scale is not None and sizes is not None:
+        raise ValueError('Please specify scale or sizes to avoid conflict!')
+    x, b, c, h, w = reshape_input(x)
+    if sizes is None and scale is not None:
+        '''
+        # Check if we can apply the convolution algorithm
+        scale_inv = 1 / scale
+        if isinstance(kernel, str) and scale_inv.is_integer():
+            kernel = discrete_kernel(kernel, scale, antialiasing=antialiasing)
+        elif isinstance(kernel, torch.Tensor) and not scale_inv.is_integer():
+            raise ValueError(
+                'An integer downsampling factor '
+                'should be used with a predefined kernel!'
+            )
+        '''
+        # Determine output size
+        sizes = (math.ceil(h * scale), math.ceil(w * scale))
+        scales = (scale, scale)
+    if scale is None and sizes is not None:
+        scales = (sizes[0] / h, sizes[1] / w)
+    x, dtype = cast_input(x)
+    if isinstance(kernel, str) and sizes is not None:
+        # Core resizing module
+        x = resize_1d(x, -2, size=sizes[0], scale=scales[0], kernel=kernel, sigma=sigma, padding_type=padding_type,
+                      antialiasing=antialiasing)
+        x = resize_1d(x, -1, size=sizes[1], scale=scales[1], kernel=kernel, sigma=sigma, padding_type=padding_type,
+                      antialiasing=antialiasing)
+    elif isinstance(kernel, torch.Tensor) and scale is not None:
+        x = downsampling_2d(x, kernel, scale=int(1 / scale))
+    x = reshape_output(x, b, c)
+    x = cast_output(x, dtype)
+    return x

libs/metric/piq/perceptual.py ADDED Viewed

	@@ -0,0 +1,496 @@

+"""
+Implementation of Content loss, Style loss, LPIPS and DISTS metrics
+References:
+    .. [1] Gatys, Leon and Ecker, Alexander and Bethge, Matthias
+    (2016). A Neural Algorithm of Artistic Style}
+    Association for Research in Vision and Ophthalmology (ARVO)
+    https://arxiv.org/abs/1508.06576
+    .. [2] Zhang, Richard and Isola, Phillip and Efros, et al.
+    (2018) The Unreasonable Effectiveness of Deep Features as a Perceptual Metric
+    2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition
+    https://arxiv.org/abs/1801.03924
+"""
+from typing import List, Union, Collection
+import torch
+import torch.nn as nn
+from torch.nn.modules.loss import _Loss
+from torchvision.models import vgg16, vgg19, VGG16_Weights, VGG19_Weights
+from .utils import _validate_input, _reduce
+from .functional import similarity_map, L2Pool2d
+# Map VGG names to corresponding number in torchvision layer
+VGG16_LAYERS = {
+    "conv1_1": '0', "relu1_1": '1',
+    "conv1_2": '2', "relu1_2": '3',
+    "pool1": '4',
+    "conv2_1": '5', "relu2_1": '6',
+    "conv2_2": '7', "relu2_2": '8',
+    "pool2": '9',
+    "conv3_1": '10', "relu3_1": '11',
+    "conv3_2": '12', "relu3_2": '13',
+    "conv3_3": '14', "relu3_3": '15',
+    "pool3": '16',
+    "conv4_1": '17', "relu4_1": '18',
+    "conv4_2": '19', "relu4_2": '20',
+    "conv4_3": '21', "relu4_3": '22',
+    "pool4": '23',
+    "conv5_1": '24', "relu5_1": '25',
+    "conv5_2": '26', "relu5_2": '27',
+    "conv5_3": '28', "relu5_3": '29',
+    "pool5": '30',
+}
+VGG19_LAYERS = {
+    "conv1_1": '0', "relu1_1": '1',
+    "conv1_2": '2', "relu1_2": '3',
+    "pool1": '4',
+    "conv2_1": '5', "relu2_1": '6',
+    "conv2_2": '7', "relu2_2": '8',
+    "pool2": '9',
+    "conv3_1": '10', "relu3_1": '11',
+    "conv3_2": '12', "relu3_2": '13',
+    "conv3_3": '14', "relu3_3": '15',
+    "conv3_4": '16', "relu3_4": '17',
+    "pool3": '18',
+    "conv4_1": '19', "relu4_1": '20',
+    "conv4_2": '21', "relu4_2": '22',
+    "conv4_3": '23', "relu4_3": '24',
+    "conv4_4": '25', "relu4_4": '26',
+    "pool4": '27',
+    "conv5_1": '28', "relu5_1": '29',
+    "conv5_2": '30', "relu5_2": '31',
+    "conv5_3": '32', "relu5_3": '33',
+    "conv5_4": '34', "relu5_4": '35',
+    "pool5": '36',
+}
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+# Constant used in feature normalization to avoid zero division
+EPS = 1e-10
+class ContentLoss(_Loss):
+    r"""Creates Content loss that can be used for image style transfer or as a measure for image to image tasks.
+    Uses pretrained VGG models from torchvision.
+    Expects input to be in range [0, 1] or normalized with ImageNet statistics into range [-1, 1]
+    Args:
+        feature_extractor: Model to extract features or model name: ``'vgg16'`` | ``'vgg19'``.
+        layers: List of strings with layer names. Default: ``'relu3_3'``
+        weights: List of float weight to balance different layers
+        replace_pooling: Flag to replace MaxPooling layer with AveragePooling. See references for details.
+        distance: Method to compute distance between features: ``'mse'`` | ``'mae'``.
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
+        mean: List of float values used for data standardization. Default: ImageNet mean.
+            If there is no need to normalize data, use [0., 0., 0.].
+        std: List of float values used for data standardization. Default: ImageNet std.
+            If there is no need to normalize data, use [1., 1., 1.].
+        normalize_features: If true, unit-normalize each feature in channel dimension before scaling
+            and computing distance. See references for details.
+    Examples:
+        >>> loss = ContentLoss()
+        >>> x = torch.rand(3, 3, 256, 256, requires_grad=True)
+        >>> y = torch.rand(3, 3, 256, 256)
+        >>> output = loss(x, y)
+        >>> output.backward()
+    References:
+        Gatys, Leon and Ecker, Alexander and Bethge, Matthias (2016).
+        A Neural Algorithm of Artistic Style
+        Association for Research in Vision and Ophthalmology (ARVO)
+        https://arxiv.org/abs/1508.06576
+        Zhang, Richard and Isola, Phillip and Efros, et al. (2018)
+        The Unreasonable Effectiveness of Deep Features as a Perceptual Metric
+        IEEE/CVF Conference on Computer Vision and Pattern Recognition
+        https://arxiv.org/abs/1801.03924
+    """
+    def __init__(self, feature_extractor: Union[str, torch.nn.Module] = "vgg16", layers: Collection[str] = ("relu3_3",),
+                 weights: List[Union[float, torch.Tensor]] = [1.], replace_pooling: bool = False,
+                 distance: str = "mse", reduction: str = "mean", mean: List[float] = IMAGENET_MEAN,
+                 std: List[float] = IMAGENET_STD, normalize_features: bool = False,
+                 allow_layers_weights_mismatch: bool = False) -> None:
+        assert allow_layers_weights_mismatch or len(layers) == len(weights), \
+            f'Lengths of provided layers and weighs mismatch ({len(weights)} weights and {len(layers)} layers), ' \
+            f'which will cause incorrect results. Please provide weight for each layer.'
+        super().__init__()
+        if callable(feature_extractor):
+            self.model = feature_extractor
+            self.layers = layers
+        else:
+            if feature_extractor == "vgg16":
+                # self.model = vgg16(pretrained=True, progress=False).features
+                self.model = vgg16(weights=VGG16_Weights.DEFAULT, progress=False).features
+                self.layers = [VGG16_LAYERS[l] for l in layers]
+            elif feature_extractor == "vgg19":
+                # self.model = vgg19(pretrained=True, progress=False).features
+                self.model = vgg19(weights=VGG19_Weights.DEFAULT, progress=False).features
+                self.layers = [VGG19_LAYERS[l] for l in layers]
+            else:
+                raise ValueError("Unknown feature extractor")
+        if replace_pooling:
+            self.model = self.replace_pooling(self.model)
+        # Disable gradients
+        for param in self.model.parameters():
+            param.requires_grad_(False)
+        self.distance = {
+            "mse": nn.MSELoss,
+            "mae": nn.L1Loss,
+        }[distance](reduction='none')
+        self.weights = [torch.tensor(w) if not isinstance(w, torch.Tensor) else w for w in weights]
+        mean = torch.tensor(mean)
+        std = torch.tensor(std)
+        self.mean = mean.view(1, -1, 1, 1)
+        self.std = std.view(1, -1, 1, 1)
+        self.normalize_features = normalize_features
+        self.reduction = reduction
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        r"""Computation of Content loss between feature representations of prediction :math:`x` and
+        target :math:`y` tensors.
+        Args:
+            x: An input tensor. Shape :math:`(N, C, H, W)`.
+            y: A target tensor. Shape :math:`(N, C, H, W)`.
+        Returns:
+            Content loss between feature representations
+        """
+        _validate_input([x, y], dim_range=(4, 4), data_range=(0, -1))
+        self.model.to(x)
+        x_features = self.get_features(x)
+        y_features = self.get_features(y)
+        distances = self.compute_distance(x_features, y_features)
+        # Scale distances, then average in spatial dimensions, then stack and sum in channels dimension
+        loss = torch.cat([(d * w.to(d)).mean(dim=[2, 3]) for d, w in zip(distances, self.weights)], dim=1).sum(dim=1)
+        return _reduce(loss, self.reduction)
+    def compute_distance(self, x_features: List[torch.Tensor], y_features: List[torch.Tensor]) -> List[torch.Tensor]:
+        r"""Take L2 or L1 distance between feature maps depending on ``distance``.
+        Args:
+            x_features: Features of the input tensor.
+            y_features: Features of the target tensor.
+        Returns:
+            Distance between feature maps
+        """
+        return [self.distance(x, y) for x, y in zip(x_features, y_features)]
+    def get_features(self, x: torch.Tensor) -> List[torch.Tensor]:
+        r"""
+        Args:
+            x: Tensor. Shape :math:`(N, C, H, W)`.
+        Returns:
+            List of features extracted from intermediate layers
+        """
+        # Normalize input
+        x = (x - self.mean.to(x)) / self.std.to(x)
+        features = []
+        for name, module in self.model._modules.items():
+            x = module(x)
+            if name in self.layers:
+                features.append(self.normalize(x) if self.normalize_features else x)
+        return features
+    @staticmethod
+    def normalize(x: torch.Tensor) -> torch.Tensor:
+        r"""Normalize feature maps in channel direction to unit length.
+        Args:
+            x: Tensor. Shape :math:`(N, C, H, W)`.
+        Returns:
+            Normalized input
+        """
+        norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
+        return x / (norm_factor + EPS)
+    def replace_pooling(self, module: torch.nn.Module) -> torch.nn.Module:
+        r"""Turn All MaxPool layers into AveragePool
+        Args:
+            module: Module to change MaxPool int AveragePool
+        Returns:
+            Module with AveragePool instead MaxPool
+        """
+        module_output = module
+        if isinstance(module, torch.nn.MaxPool2d):
+            module_output = torch.nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
+        for name, child in module.named_children():
+            module_output.add_module(name, self.replace_pooling(child))
+        return module_output
+class StyleLoss(ContentLoss):
+    r"""Creates Style loss that can be used for image style transfer or as a measure in
+    image to image tasks. Computes distance between Gram matrices of feature maps.
+    Uses pretrained VGG models from torchvision.
+    By default expects input to be in range [0, 1], which is then normalized by ImageNet statistics into range [-1, 1].
+    If no normalisation is required, change `mean` and `std` values accordingly.
+    Args:
+        feature_extractor: Model to extract features or model name: ``'vgg16'`` | ``'vgg19'``.
+        layers: List of strings with layer names. Default: ``'relu3_3'``
+        weights: List of float weight to balance different layers
+        replace_pooling: Flag to replace MaxPooling layer with AveragePooling. See references for details.
+        distance: Method to compute distance between features: ``'mse'`` | ``'mae'``.
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
+        mean: List of float values used for data standardization. Default: ImageNet mean.
+            If there is no need to normalize data, use [0., 0., 0.].
+        std: List of float values used for data standardization. Default: ImageNet std.
+            If there is no need to normalize data, use [1., 1., 1.].
+        normalize_features: If true, unit-normalize each feature in channel dimension before scaling
+            and computing distance. See references for details.
+    Examples:
+        >>> loss = StyleLoss()
+        >>> x = torch.rand(3, 3, 256, 256, requires_grad=True)
+        >>> y = torch.rand(3, 3, 256, 256)
+        >>> output = loss(x, y)
+        >>> output.backward()
+    References:
+        Gatys, Leon and Ecker, Alexander and Bethge, Matthias (2016).
+        A Neural Algorithm of Artistic Style
+        Association for Research in Vision and Ophthalmology (ARVO)
+        https://arxiv.org/abs/1508.06576
+        Zhang, Richard and Isola, Phillip and Efros, et al. (2018)
+        The Unreasonable Effectiveness of Deep Features as a Perceptual Metric
+        IEEE/CVF Conference on Computer Vision and Pattern Recognition
+        https://arxiv.org/abs/1801.03924
+    """
+    def compute_distance(self, x_features: torch.Tensor, y_features: torch.Tensor):
+        r"""Take L2 or L1 distance between Gram matrices of feature maps depending on ``distance``.
+        Args:
+            x_features: Features of the input tensor.
+            y_features: Features of the target tensor.
+        Returns:
+            Distance between Gram matrices
+        """
+        x_gram = [self.gram_matrix(x) for x in x_features]
+        y_gram = [self.gram_matrix(x) for x in y_features]
+        return [self.distance(x, y) for x, y in zip(x_gram, y_gram)]
+    @staticmethod
+    def gram_matrix(x: torch.Tensor) -> torch.Tensor:
+        r"""Compute Gram matrix for batch of features.
+        Args:
+            x: Tensor. Shape :math:`(N, C, H, W)`.
+        Returns:
+            Gram matrix for given input
+        """
+        B, C, H, W = x.size()
+        gram = []
+        for i in range(B):
+            features = x[i].view(C, H * W)
+            # Add fake channel dimension
+            gram.append(torch.mm(features, features.t()).unsqueeze(0))
+        return torch.stack(gram)
+class LPIPS(ContentLoss):
+    r"""Learned Perceptual Image Patch Similarity metric. Only VGG16 learned weights are supported.
+    By default expects input to be in range [0, 1], which is then normalized by ImageNet statistics into range [-1, 1].
+    If no normalisation is required, change `mean` and `std` values accordingly.
+    Args:
+        replace_pooling: Flag to replace MaxPooling layer with AveragePooling. See references for details.
+        distance: Method to compute distance between features: ``'mse'`` | ``'mae'``.
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
+        mean: List of float values used for data standardization. Default: ImageNet mean.
+            If there is no need to normalize data, use [0., 0., 0.].
+        std: List of float values used for data standardization. Default: ImageNet std.
+            If there is no need to normalize data, use [1., 1., 1.].
+    Examples:
+        >>> loss = LPIPS()
+        >>> x = torch.rand(3, 3, 256, 256, requires_grad=True)
+        >>> y = torch.rand(3, 3, 256, 256)
+        >>> output = loss(x, y)
+        >>> output.backward()
+    References:
+        Gatys, Leon and Ecker, Alexander and Bethge, Matthias (2016).
+        A Neural Algorithm of Artistic Style
+        Association for Research in Vision and Ophthalmology (ARVO)
+        https://arxiv.org/abs/1508.06576
+        Zhang, Richard and Isola, Phillip and Efros, et al. (2018)
+        The Unreasonable Effectiveness of Deep Features as a Perceptual Metric
+        IEEE/CVF Conference on Computer Vision and Pattern Recognition
+        https://arxiv.org/abs/1801.03924
+        https://github.com/richzhang/PerceptualSimilarity
+    """
+    _weights_url = "https://github.com/photosynthesis-team/" + \
+                   "photosynthesis.metrics/releases/download/v0.4.0/lpips_weights.pt"
+    def __init__(self, replace_pooling: bool = False, distance: str = "mse", reduction: str = "mean",
+                 mean: List[float] = IMAGENET_MEAN, std: List[float] = IMAGENET_STD, ) -> None:
+        lpips_layers = ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3']
+        lpips_weights = torch.hub.load_state_dict_from_url(self._weights_url, progress=False)
+        super().__init__("vgg16", layers=lpips_layers, weights=lpips_weights,
+                         replace_pooling=replace_pooling, distance=distance,
+                         reduction=reduction, mean=mean, std=std,
+                         normalize_features=True)
+class DISTS(ContentLoss):
+    r"""Deep Image Structure and Texture Similarity metric.
+    By default expects input to be in range [0, 1], which is then normalized by ImageNet statistics into range [-1, 1].
+    If no normalisation is required, change `mean` and `std` values accordingly.
+    Args:
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
+        mean: List of float values used for data standardization. Default: ImageNet mean.
+            If there is no need to normalize data, use [0., 0., 0.].
+        std: List of float values used for data standardization. Default: ImageNet std.
+            If there is no need to normalize data, use [1., 1., 1.].
+    Examples:
+        >>> loss = DISTS()
+        >>> x = torch.rand(3, 3, 256, 256, requires_grad=True)
+        >>> y = torch.rand(3, 3, 256, 256)
+        >>> output = loss(x, y)
+        >>> output.backward()
+    References:
+        Keyan Ding, Kede Ma, Shiqi Wang, Eero P. Simoncelli (2020).
+        Image Quality Assessment: Unifying Structure and Texture Similarity.
+        https://arxiv.org/abs/2004.07728
+        https://github.com/dingkeyan93/DISTS
+    """
+    _weights_url = "https://github.com/photosynthesis-team/piq/releases/download/v0.4.1/dists_weights.pt"
+    def __init__(self, reduction: str = "mean", mean: List[float] = IMAGENET_MEAN,
+                 std: List[float] = IMAGENET_STD) -> None:
+        dists_layers = ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3']
+        channels = [3, 64, 128, 256, 512, 512]
+        weights = torch.hub.load_state_dict_from_url(self._weights_url, progress=False)
+        dists_weights = list(torch.split(weights['alpha'], channels, dim=1))
+        dists_weights.extend(torch.split(weights['beta'], channels, dim=1))
+        super().__init__("vgg16", layers=dists_layers, weights=dists_weights,
+                         replace_pooling=True, reduction=reduction, mean=mean, std=std,
+                         normalize_features=False, allow_layers_weights_mismatch=True)
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        r"""
+        Args:
+            x: An input tensor. Shape :math:`(N, C, H, W)`.
+            y: A target tensor. Shape :math:`(N, C, H, W)`.
+        Returns:
+            Deep Image Structure and Texture Similarity loss, i.e. ``1-DISTS`` in range [0, 1].
+        """
+        _, _, H, W = x.shape
+        if min(H, W) > 256:
+            x = torch.nn.functional.interpolate(
+                x, scale_factor=256 / min(H, W), recompute_scale_factor=False, mode='bilinear')
+            y = torch.nn.functional.interpolate(
+                y, scale_factor=256 / min(H, W), recompute_scale_factor=False, mode='bilinear')
+        loss = super().forward(x, y)
+        return 1 - loss
+    def compute_distance(self, x_features: torch.Tensor, y_features: torch.Tensor) -> List[torch.Tensor]:
+        r"""Compute structure similarity between feature maps
+        Args:
+            x_features: Features of the input tensor.
+            y_features: Features of the target tensor.
+        Returns:
+            Structural similarity distance between feature maps
+        """
+        structure_distance, texture_distance = [], []
+        # Small constant for numerical stability
+        EPS = 1e-6
+        for x, y in zip(x_features, y_features):
+            x_mean = x.mean([2, 3], keepdim=True)
+            y_mean = y.mean([2, 3], keepdim=True)
+            structure_distance.append(similarity_map(x_mean, y_mean, constant=EPS))
+            x_var = ((x - x_mean) ** 2).mean([2, 3], keepdim=True)
+            y_var = ((y - y_mean) ** 2).mean([2, 3], keepdim=True)
+            xy_cov = (x * y).mean([2, 3], keepdim=True) - x_mean * y_mean
+            texture_distance.append((2 * xy_cov + EPS) / (x_var + y_var + EPS))
+        return structure_distance + texture_distance
+    def get_features(self, x: torch.Tensor) -> List[torch.Tensor]:
+        r"""
+        Args:
+            x: Input tensor
+        Returns:
+            List of features extracted from input tensor
+        """
+        features = super().get_features(x)
+        # Add input tensor as an additional feature
+        features.insert(0, x)
+        return features
+    def replace_pooling(self, module: torch.nn.Module) -> torch.nn.Module:
+        r"""Turn All MaxPool layers into L2Pool
+        Args:
+            module: Module to change MaxPool into L2Pool
+        Returns:
+            Module with L2Pool instead of MaxPool
+        """
+        module_output = module
+        if isinstance(module, torch.nn.MaxPool2d):
+            module_output = L2Pool2d(kernel_size=3, stride=2, padding=1)
+        for name, child in module.named_children():
+            module_output.add_module(name, self.replace_pooling(child))
+        return module_output

libs/metric/piq/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .common import _validate_input, _reduce, _parse_version
+__all__ = [
+    "_validate_input",
+    "_reduce",
+    '_parse_version'
+]

libs/metric/piq/utils/common.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import torch
+import re
+import warnings
+from typing import Tuple, List, Optional, Union, Dict, Any
+SEMVER_VERSION_PATTERN = re.compile(
+    r"""
+        ^
+        (?P<major>0|[1-9]\d*)
+        \.
+        (?P<minor>0|[1-9]\d*)
+        \.
+        (?P<patch>0|[1-9]\d*)
+        (?:-(?P<prerelease>
+            (?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)
+            (?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*
+        ))?
+        (?:\+(?P<build>
+            [0-9a-zA-Z-]+
+            (?:\.[0-9a-zA-Z-]+)*
+        ))?
+        $
+    """,
+    re.VERBOSE,
+)
+PEP_440_VERSION_PATTERN = r"""
+    v?
+    (?:
+        (?:(?P<epoch>[0-9]+)!)?                           # epoch
+        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
+        (?P<pre>                                          # pre-release
+            [-_\.]?
+            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P<pre_n>[0-9]+)?
+        )?
+        (?P<post>                                         # post release
+            (?:-(?P<post_n1>[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?P<post_l>post|rev|r)
+                [-_\.]?
+                (?P<post_n2>[0-9]+)?
+            )
+        )?
+        (?P<dev>                                          # dev release
+            [-_\.]?
+            (?P<dev_l>dev)
+            [-_\.]?
+            (?P<dev_n>[0-9]+)?
+        )?
+    )
+    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+def _validate_input(
+        tensors: List[torch.Tensor],
+        dim_range: Tuple[int, int] = (0, -1),
+        data_range: Tuple[float, float] = (0., -1.),
+        # size_dim_range: Tuple[float, float] = (0., -1.),
+        size_range: Optional[Tuple[int, int]] = None,
+) -> None:
+    r"""Check that input(-s)  satisfies the requirements
+    Args:
+        tensors: Tensors to check
+        dim_range: Allowed number of dimensions. (min, max)
+        data_range: Allowed range of values in tensors. (min, max)
+        size_range: Dimensions to include in size comparison. (start_dim, end_dim + 1)
+    """
+    if not __debug__:
+        return
+    x = tensors[0]
+    for t in tensors:
+        assert torch.is_tensor(t), f'Expected torch.Tensor, got {type(t)}'
+        assert t.device == x.device, f'Expected tensors to be on {x.device}, got {t.device}'
+        if size_range is None:
+            assert t.size() == x.size(), f'Expected tensors with same size, got {t.size()} and {x.size()}'
+        else:
+            assert t.size()[size_range[0]: size_range[1]] == x.size()[size_range[0]: size_range[1]], \
+                f'Expected tensors with same size at given dimensions, got {t.size()} and {x.size()}'
+        if dim_range[0] == dim_range[1]:
+            assert t.dim() == dim_range[0], f'Expected number of dimensions to be {dim_range[0]}, got {t.dim()}'
+        elif dim_range[0] < dim_range[1]:
+            assert dim_range[0] <= t.dim() <= dim_range[1], \
+                f'Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}'
+        if data_range[0] < data_range[1]:
+            assert data_range[0] <= t.min(), \
+                f'Expected values to be greater or equal to {data_range[0]}, got {t.min()}'
+            assert t.max() <= data_range[1], \
+                f'Expected values to be lower or equal to {data_range[1]}, got {t.max()}'
+def _reduce(x: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
+    r"""Reduce input in batch dimension if needed.
+    Args:
+        x: Tensor with shape (N, *).
+        reduction: Specifies the reduction type:
+            ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'``
+    """
+    if reduction == 'none':
+        return x
+    elif reduction == 'mean':
+        return x.mean(dim=0)
+    elif reduction == 'sum':
+        return x.sum(dim=0)
+    else:
+        raise ValueError("Unknown reduction. Expected one of {'none', 'mean', 'sum'}")
+def _parse_version(version: Union[str, bytes]) -> Tuple[int, ...]:
+    """ Parses valid Python versions according to Semver and PEP 440 specifications.
+    For more on Semver check: https://semver.org/
+    For more on PEP 440 check: https://www.python.org/dev/peps/pep-0440/.
+    Implementation is inspired by:
+    - https://github.com/python-semver
+    - https://github.com/pypa/packaging
+    Args:
+        version: unparsed information about the library of interest.
+    Returns:
+        parsed information about the library of interest.
+    """
+    if isinstance(version, bytes):
+        version = version.decode("UTF-8")
+    elif not isinstance(version, str) and not isinstance(version, bytes):
+        raise TypeError(f"not expecting type {type(version)}")
+    # Semver processing
+    match = SEMVER_VERSION_PATTERN.match(version)
+    if match:
+        matched_version_parts: Dict[str, Any] = match.groupdict()
+        release = tuple([int(matched_version_parts[k]) for k in ['major', 'minor', 'patch']])
+        return release
+    # PEP 440 processing
+    regex = re.compile(r"^\s*" + PEP_440_VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    match = regex.search(version)
+    if match is None:
+        warnings.warn(f"{version} is not a valid SemVer or PEP 440 string")
+        return tuple()
+    release = tuple(int(i) for i in match.group("release").split("."))
+    return release

libs/metric/pytorch_fid/__init__.py ADDED Viewed

	@@ -0,0 +1,54 @@

+__version__ = '0.3.0'
+import torch
+from einops import rearrange, repeat
+from .inception import InceptionV3
+from .fid_score import calculate_frechet_distance
+class PytorchFIDFactory(torch.nn.Module):
+    """
+   Args:
+       channels:
+       inception_block_idx:
+    Examples:
+    >>> fid_factory =  PytorchFIDFactory()
+    >>> fid_score = fid_factory.score(real_samples=data, fake_samples=all_images)
+    >>> print(fid_score)
+   """
+    def __init__(self, channels: int = 3, inception_block_idx: int = 2048):
+        super().__init__()
+        self.channels = channels
+        # load models
+        assert inception_block_idx in InceptionV3.BLOCK_INDEX_BY_DIM
+        block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[inception_block_idx]
+        self.inception_v3 = InceptionV3([block_idx])
+    @torch.no_grad()
+    def calculate_activation_statistics(self, samples):
+        features = self.inception_v3(samples)[0]
+        features = rearrange(features, '... 1 1 -> ...')
+        mu = torch.mean(features, dim=0).cpu()
+        sigma = torch.cov(features).cpu()
+        return mu, sigma
+    def score(self, real_samples, fake_samples):
+        if self.channels == 1:
+            real_samples, fake_samples = map(
+                lambda t: repeat(t, 'b 1 ... -> b c ...', c=3), (real_samples, fake_samples)
+            )
+        min_batch = min(real_samples.shape[0], fake_samples.shape[0])
+        real_samples, fake_samples = map(lambda t: t[:min_batch], (real_samples, fake_samples))
+        m1, s1 = self.calculate_activation_statistics(real_samples)
+        m2, s2 = self.calculate_activation_statistics(fake_samples)
+        fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+        return fid_value

libs/metric/pytorch_fid/fid_score.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
+The FID metric calculates the distance between two distributions of images.
+Typically, we have summary statistics (mean & covariance matrix) of one
+of these distributions, while the 2nd distribution is given by a GAN.
+When run as a stand-alone program, it compares the distribution of
+images that are stored as PNG/JPEG at a specified location with a
+distribution given by summary statistics (in pickle format).
+The FID is calculated by assuming that X_1 and X_2 are the activations of
+the pool_3 layer of the inception net for generated samples and real world
+samples respectively.
+See --help to see further details.
+Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
+of Tensorflow
+Copyright 2018 Institute of Bioinformatics, JKU Linz
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import pathlib
+from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
+import numpy as np
+import torch
+import torchvision.transforms as TF
+from PIL import Image
+from scipy import linalg
+from torch.nn.functional import adaptive_avg_pool2d
+try:
+    from tqdm import tqdm
+except ImportError:
+    # If tqdm is not available, provide a mock version of it
+    def tqdm(x):
+        return x
+from .inception import InceptionV3
+parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+parser.add_argument('--batch-size', type=int, default=50,
+                    help='Batch size to use')
+parser.add_argument('--num-workers', type=int,
+                    help=('Number of processes to use for data loading. '
+                          'Defaults to `min(8, num_cpus)`'))
+parser.add_argument('--device', type=str, default=None,
+                    help='Device to use. Like cuda, cuda:0 or cpu')
+parser.add_argument('--dims', type=int, default=2048,
+                    choices=list(InceptionV3.BLOCK_INDEX_BY_DIM),
+                    help=('Dimensionality of Inception features to use. '
+                          'By default, uses pool3 features'))
+parser.add_argument('--save-stats', action='store_true',
+                    help=('Generate an npz archive from a directory of samples. '
+                          'The first path is used as input and the second as output.'))
+parser.add_argument('path', type=str, nargs=2,
+                    help=('Paths to the generated images or '
+                          'to .npz statistic files'))
+IMAGE_EXTENSIONS = {'bmp', 'jpg', 'jpeg', 'pgm', 'png', 'ppm',
+                    'tif', 'tiff', 'webp'}
+class ImagePathDataset(torch.utils.data.Dataset):
+    def __init__(self, files, transforms=None):
+        self.files = files
+        self.transforms = transforms
+    def __len__(self):
+        return len(self.files)
+    def __getitem__(self, i):
+        path = self.files[i]
+        img = Image.open(path).convert('RGB')
+        if self.transforms is not None:
+            img = self.transforms(img)
+        return img
+def get_activations(files, model, batch_size=50, dims=2048, device='cpu',
+                    num_workers=1):
+    """Calculates the activations of the pool_3 layer for all images.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : Batch size of images for the model to process at once.
+                     Make sure that the number of samples is a multiple of
+                     the batch size, otherwise some samples are ignored. This
+                     behavior is retained to match the original FID score
+                     implementation.
+    -- dims        : Dimensionality of features returned by Inception
+    -- device      : Device to run calculations
+    -- num_workers : Number of parallel dataloader workers
+    Returns:
+    -- A numpy array of dimension (num images, dims) that contains the
+       activations of the given tensor when feeding inception with the
+       query tensor.
+    """
+    model.eval()
+    if batch_size > len(files):
+        print(('Warning: batch size is bigger than the data size. '
+               'Setting batch size to data size'))
+        batch_size = len(files)
+    dataset = ImagePathDataset(files, transforms=TF.ToTensor())
+    dataloader = torch.utils.data.DataLoader(dataset,
+                                             batch_size=batch_size,
+                                             shuffle=False,
+                                             drop_last=False,
+                                             num_workers=num_workers)
+    pred_arr = np.empty((len(files), dims))
+    start_idx = 0
+    for batch in tqdm(dataloader):
+        batch = batch.to(device)
+        with torch.no_grad():
+            pred = model(batch)[0]
+        # If model output is not scalar, apply global spatial average pooling.
+        # This happens if you choose a dimensionality not equal 2048.
+        if pred.size(2) != 1 or pred.size(3) != 1:
+            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
+        pred = pred.squeeze(3).squeeze(2).cpu().numpy()
+        pred_arr[start_idx:start_idx + pred.shape[0]] = pred
+        start_idx = start_idx + pred.shape[0]
+    return pred_arr
+def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative data set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative data set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert mu1.shape == mu2.shape, \
+        'Training and test mean vectors have different lengths'
+    assert sigma1.shape == sigma2.shape, \
+        'Training and test covariances have different dimensions'
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return (diff.dot(diff) + np.trace(sigma1)
+            + np.trace(sigma2) - 2 * tr_covmean)
+def calculate_activation_statistics(files, model, batch_size=50, dims=2048,
+                                    device='cpu', num_workers=1):
+    """Calculation of the statistics used by the FID.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : The images numpy array is split into batches with
+                     batch size batch_size. A reasonable batch size
+                     depends on the hardware.
+    -- dims        : Dimensionality of features returned by Inception
+    -- device      : Device to run calculations
+    -- num_workers : Number of parallel dataloader workers
+    Returns:
+    -- mu    : The mean over samples of the activations of the pool_3 layer of
+               the inception model.
+    -- sigma : The covariance matrix of the activations of the pool_3 layer of
+               the inception model.
+    """
+    act = get_activations(files, model, batch_size, dims, device, num_workers)
+    mu = np.mean(act, axis=0)
+    sigma = np.cov(act, rowvar=False)
+    return mu, sigma
+def compute_statistics_of_path(path, model, batch_size, dims, device,
+                               num_workers=1):
+    if path.endswith('.npz'):
+        with np.load(path) as f:
+            m, s = f['mu'][:], f['sigma'][:]
+    else:
+        path = pathlib.Path(path)
+        files = sorted([file for ext in IMAGE_EXTENSIONS
+                        for file in path.glob('*.{}'.format(ext))])
+        m, s = calculate_activation_statistics(files, model, batch_size,
+                                               dims, device, num_workers)
+    return m, s
+def calculate_fid_given_paths(paths, batch_size, device, dims, num_workers=1):
+    """Calculates the FID of two paths"""
+    for p in paths:
+        if not os.path.exists(p):
+            raise RuntimeError('Invalid path: %s' % p)
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx]).to(device)
+    m1, s1 = compute_statistics_of_path(paths[0], model, batch_size,
+                                        dims, device, num_workers)
+    m2, s2 = compute_statistics_of_path(paths[1], model, batch_size,
+                                        dims, device, num_workers)
+    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+    return fid_value
+def save_fid_stats(paths, batch_size, device, dims, num_workers=1):
+    """Calculates the FID of two paths"""
+    if not os.path.exists(paths[0]):
+        raise RuntimeError('Invalid path: %s' % paths[0])
+    if os.path.exists(paths[1]):
+        raise RuntimeError('Existing output file: %s' % paths[1])
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx]).to(device)
+    print(f"Saving statistics for {paths[0]}")
+    m1, s1 = compute_statistics_of_path(paths[0], model, batch_size,
+                                        dims, device, num_workers)
+    np.savez_compressed(paths[1], mu=m1, sigma=s1)
+def main():
+    args = parser.parse_args()
+    if args.device is None:
+        device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')
+    else:
+        device = torch.device(args.device)
+    if args.num_workers is None:
+        try:
+            num_cpus = len(os.sched_getaffinity(0))
+        except AttributeError:
+            # os.sched_getaffinity is not available under Windows, use
+            # os.cpu_count instead (which may not return the *available* number
+            # of CPUs).
+            num_cpus = os.cpu_count()
+        num_workers = min(num_cpus, 8) if num_cpus is not None else 0
+    else:
+        num_workers = args.num_workers
+    if args.save_stats:
+        save_fid_stats(args.path, args.batch_size, device, args.dims, num_workers)
+        return
+    fid_value = calculate_fid_given_paths(args.path,
+                                          args.batch_size,
+                                          device,
+                                          args.dims,
+                                          num_workers)
+    print('FID: ', fid_value)
+if __name__ == '__main__':
+    main()

libs/metric/pytorch_fid/inception.py ADDED Viewed

	@@ -0,0 +1,341 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+# Inception weights ported to Pytorch from
+# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+FID_WEIGHTS_URL = 'https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'  # noqa: E501
+class InceptionV3(nn.Module):
+    """Pretrained InceptionV3 network returning feature maps"""
+    # Index of default block of inception to return,
+    # corresponds to output of final average pooling
+    DEFAULT_BLOCK_INDEX = 3
+    # Maps feature dimensionality to their output blocks indices
+    BLOCK_INDEX_BY_DIM = {
+        64: 0,   # First max pooling features
+        192: 1,  # Second max pooling featurs
+        768: 2,  # Pre-aux classifier features
+        2048: 3  # Final average pooling features
+    }
+    def __init__(self,
+                 output_blocks=(DEFAULT_BLOCK_INDEX,),
+                 resize_input=True,
+                 normalize_input=True,
+                 requires_grad=False,
+                 use_fid_inception=True):
+        """Build pretrained InceptionV3
+        Parameters
+        ----------
+        output_blocks : list of int
+            Indices of blocks to return features of. Possible values are:
+                - 0: corresponds to output of first max pooling
+                - 1: corresponds to output of second max pooling
+                - 2: corresponds to output which is fed to aux classifier
+                - 3: corresponds to output of final average pooling
+        resize_input : bool
+            If true, bilinearly resizes input to width and height 299 before
+            feeding input to model. As the network without fully connected
+            layers is fully convolutional, it should be able to handle inputs
+            of arbitrary size, so resizing might not be strictly needed
+        normalize_input : bool
+            If true, scales the input from range (0, 1) to the range the
+            pretrained Inception network expects, namely (-1, 1)
+        requires_grad : bool
+            If true, parameters of the model require gradients. Possibly useful
+            for finetuning the network
+        use_fid_inception : bool
+            If true, uses the pretrained Inception model used in Tensorflow's
+            FID implementation. If false, uses the pretrained Inception model
+            available in torchvision. The FID Inception model has different
+            weights and a slightly different structure from torchvision's
+            Inception model. If you want to compute FID scores, you are
+            strongly advised to set this parameter to true to get comparable
+            results.
+        """
+        super(InceptionV3, self).__init__()
+        self.resize_input = resize_input
+        self.normalize_input = normalize_input
+        self.output_blocks = sorted(output_blocks)
+        self.last_needed_block = max(output_blocks)
+        assert self.last_needed_block <= 3, \
+            'Last possible output block index is 3'
+        self.blocks = nn.ModuleList()
+        if use_fid_inception:
+            inception = fid_inception_v3()
+        else:
+            inception = _inception_v3(weights='DEFAULT')
+        # Block 0: input to maxpool1
+        block0 = [
+            inception.Conv2d_1a_3x3,
+            inception.Conv2d_2a_3x3,
+            inception.Conv2d_2b_3x3,
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ]
+        self.blocks.append(nn.Sequential(*block0))
+        # Block 1: maxpool1 to maxpool2
+        if self.last_needed_block >= 1:
+            block1 = [
+                inception.Conv2d_3b_1x1,
+                inception.Conv2d_4a_3x3,
+                nn.MaxPool2d(kernel_size=3, stride=2)
+            ]
+            self.blocks.append(nn.Sequential(*block1))
+        # Block 2: maxpool2 to aux classifier
+        if self.last_needed_block >= 2:
+            block2 = [
+                inception.Mixed_5b,
+                inception.Mixed_5c,
+                inception.Mixed_5d,
+                inception.Mixed_6a,
+                inception.Mixed_6b,
+                inception.Mixed_6c,
+                inception.Mixed_6d,
+                inception.Mixed_6e,
+            ]
+            self.blocks.append(nn.Sequential(*block2))
+        # Block 3: aux classifier to final avgpool
+        if self.last_needed_block >= 3:
+            block3 = [
+                inception.Mixed_7a,
+                inception.Mixed_7b,
+                inception.Mixed_7c,
+                nn.AdaptiveAvgPool2d(output_size=(1, 1))
+            ]
+            self.blocks.append(nn.Sequential(*block3))
+        for param in self.parameters():
+            param.requires_grad = requires_grad
+    def forward(self, inp):
+        """Get Inception feature maps
+        Parameters
+        ----------
+        inp : torch.autograd.Variable
+            Input tensor of shape Bx3xHxW. Values are expected to be in
+            range (0, 1)
+        Returns
+        -------
+        List of torch.autograd.Variable, corresponding to the selected output
+        block, sorted ascending by index
+        """
+        outp = []
+        x = inp
+        if self.resize_input:
+            x = F.interpolate(x,
+                              size=(299, 299),
+                              mode='bilinear',
+                              align_corners=False)
+        if self.normalize_input:
+            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)
+        for idx, block in enumerate(self.blocks):
+            x = block(x)
+            if idx in self.output_blocks:
+                outp.append(x)
+            if idx == self.last_needed_block:
+                break
+        return outp
+def _inception_v3(*args, **kwargs):
+    """Wraps `torchvision.models.inception_v3`"""
+    try:
+        version = tuple(map(int, torchvision.__version__.split('.')[:2]))
+    except ValueError:
+        # Just a caution against weird version strings
+        version = (0,)
+    # Skips default weight inititialization if supported by torchvision
+    # version. See https://github.com/mseitzer/pytorch-fid/issues/28.
+    if version >= (0, 6):
+        kwargs['init_weights'] = False
+    # Backwards compatibility: `weights` argument was handled by `pretrained`
+    # argument prior to version 0.13.
+    if version < (0, 13) and 'weights' in kwargs:
+        if kwargs['weights'] == 'DEFAULT':
+            kwargs['pretrained'] = True
+        elif kwargs['weights'] is None:
+            kwargs['pretrained'] = False
+        else:
+            raise ValueError(
+                'weights=={} not supported in torchvision {}'.format(
+                    kwargs['weights'], torchvision.__version__
+                )
+            )
+        del kwargs['weights']
+    return torchvision.models.inception_v3(*args, **kwargs)
+def fid_inception_v3():
+    """Build pretrained Inception model for FID computation
+    The Inception model for FID computation uses a different set of weights
+    and has a slightly different structure than torchvision's Inception.
+    This method first constructs torchvision's Inception and then patches the
+    necessary parts that are different in the FID Inception model.
+    """
+    inception = _inception_v3(num_classes=1008,
+                              aux_logits=False,
+                              weights=None)
+    inception.Mixed_5b = FIDInceptionA(192, pool_features=32)
+    inception.Mixed_5c = FIDInceptionA(256, pool_features=64)
+    inception.Mixed_5d = FIDInceptionA(288, pool_features=64)
+    inception.Mixed_6b = FIDInceptionC(768, channels_7x7=128)
+    inception.Mixed_6c = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6d = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6e = FIDInceptionC(768, channels_7x7=192)
+    inception.Mixed_7b = FIDInceptionE_1(1280)
+    inception.Mixed_7c = FIDInceptionE_2(2048)
+    state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=True)
+    inception.load_state_dict(state_dict)
+    return inception
+class FIDInceptionA(torchvision.models.inception.InceptionA):
+    """InceptionA block patched for FID computation"""
+    def __init__(self, in_channels, pool_features):
+        super(FIDInceptionA, self).__init__(in_channels, pool_features)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch5x5 = self.branch5x5_1(x)
+        branch5x5 = self.branch5x5_2(branch5x5)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionC(torchvision.models.inception.InceptionC):
+    """InceptionC block patched for FID computation"""
+    def __init__(self, in_channels, channels_7x7):
+        super(FIDInceptionC, self).__init__(in_channels, channels_7x7)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch7x7 = self.branch7x7_1(x)
+        branch7x7 = self.branch7x7_2(branch7x7)
+        branch7x7 = self.branch7x7_3(branch7x7)
+        branch7x7dbl = self.branch7x7dbl_1(x)
+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_1(torchvision.models.inception.InceptionE):
+    """First InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_1, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_2(torchvision.models.inception.InceptionE):
+    """Second InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_2, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: The FID Inception model uses max pooling instead of average
+        # pooling. This is likely an error in this specific Inception
+        # implementation, as other Inception models use average pooling here
+        # (which matches the description in the paper).
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)

libs/modules/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+