HopooLinZ
/

VA-Count

Model card Files Files and versions Community

HopooLinZ commited on Sep 30, 2024

Commit

1d588ce

verified ·

1 Parent(s): 05c00d8

Delete VA-Count-main

Browse files

Files changed (35) hide show

VA-Count-main/VA-Count-main/FSC_pretrain.py +0 -380
VA-Count-main/VA-Count-main/FSC_tain.py +0 -532
VA-Count-main/VA-Count-main/FSC_test.py +0 -352
VA-Count-main/VA-Count-main/LICENSE +0 -21
VA-Count-main/VA-Count-main/README.md +0 -100
VA-Count-main/VA-Count-main/__pycache__/models_crossvit.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/__pycache__/models_mae_cross.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/__pycache__/models_mae_noct.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/__pycache__/models_mae_noct.cpython-39.pyc +0 -0
VA-Count-main/VA-Count-main/biclassify.py +0 -163
VA-Count-main/VA-Count-main/datasetmake.py +0 -53
VA-Count-main/VA-Count-main/figure.png +0 -0
VA-Count-main/VA-Count-main/grounding_neg.py +0 -188
VA-Count-main/VA-Count-main/grounding_pos.py +0 -141
VA-Count-main/VA-Count-main/models_crossvit.py +0 -155
VA-Count-main/VA-Count-main/models_mae_cross.py +0 -253
VA-Count-main/VA-Count-main/models_mae_noct.py +0 -234
VA-Count-main/VA-Count-main/requirements.txt +0 -15
VA-Count-main/VA-Count-main/util/FSC147.py +0 -524
VA-Count-main/VA-Count-main/util/__pycache__/FSC147.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/FSC147.cpython-39.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/FSC147_test.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/lr_sched.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/lr_sched.cpython-39.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/misc.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/misc.cpython-39.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/pos_embed.cpython-38.pyc +0 -0
VA-Count-main/VA-Count-main/util/__pycache__/pos_embed.cpython-39.pyc +0 -0
VA-Count-main/VA-Count-main/util/crop.py +0 -42
VA-Count-main/VA-Count-main/util/datasets.py +0 -65
VA-Count-main/VA-Count-main/util/lars.py +0 -47
VA-Count-main/VA-Count-main/util/lr_decay.py +0 -76
VA-Count-main/VA-Count-main/util/lr_sched.py +0 -21
VA-Count-main/VA-Count-main/util/misc.py +0 -624
VA-Count-main/VA-Count-main/util/pos_embed.py +0 -97

VA-Count-main/VA-Count-main/FSC_pretrain.py DELETED Viewed

@@ -1,380 +0,0 @@
-import argparse
-import datetime
-import json
-import PIL.Image
-import numpy as np
-import os
-import time
-import random
-from pathlib import Path
-import math
-import sys
-from PIL import Image
-import torch
-import torch.backends.cudnn as cudnn
-from torch.utils.tensorboard import SummaryWriter
-import torch.nn.functional as F
-from torch.utils.data import Dataset
-import wandb
-import timm
-assert "0.4.5" <= timm.__version__ <= "0.4.9"  # version check
-import timm.optim.optim_factory as optim_factory
-import util.misc as misc
-from util.misc import NativeScalerWithGradNormCount as NativeScaler
-import util.lr_sched as lr_sched
-from util.FSC147 import transform_pre_train
-import models_mae_noct
-def get_args_parser():
-    parser = argparse.ArgumentParser('MAE pre-training', add_help=False)
-    parser.add_argument('--batch_size', default=8, type=int,
-                        help='Batch size per GPU (effective batch size is batch_size * accum_iter * # gpus')
-    parser.add_argument('--epochs', default=200, type=int)
-    parser.add_argument('--accum_iter', default=1, type=int,
-                        help='Accumulate gradient iterations (for increasing the effective batch size under memory constraints)')
-    # Model parameters
-    parser.add_argument('--model', default='mae_vit_base_patch16', type=str, metavar='MODEL',
-                        help='Name of model to train')
-    parser.add_argument('--mask_ratio', default=0.5, type=float,
-                        help='Masking ratio (percentage of removed patches).')
-    parser.add_argument('--norm_pix_loss', action='store_true',
-                        help='Use (per-patch) normalized pixels as targets for computing loss')
-    parser.set_defaults(norm_pix_loss=False)
-    # Optimizer parameters
-    parser.add_argument('--weight_decay', type=float, default=0.05,
-                        help='weight decay (default: 0.05)')
-    parser.add_argument('--lr', type=float, default=None, metavar='LR',
-                        help='learning rate (absolute lr)')
-    parser.add_argument('--blr', type=float, default=1e-3, metavar='LR',
-                        help='base learning rate: absolute_lr = base_lr * total_batch_size / 256')
-    parser.add_argument('--min_lr', type=float, default=0., metavar='LR',
-                        help='lower lr bound for cyclic schedulers that hit 0')
-    parser.add_argument('--warmup_epochs', type=int, default=10, metavar='N',
-                        help='epochs to warmup LR')
-    # Dataset parameters
-    parser.add_argument('--data_path', default='./data/FSC147/', type=str,
-                        help='dataset path')
-    parser.add_argument('--anno_file', default='annotation_FSC147_384.json', type=str,
-                        help='annotation json file')
-    parser.add_argument('--data_split_file', default='Train_Test_Val_FSC_147.json', type=str,
-                        help='data split json file')
-    parser.add_argument('--im_dir', default='images_384_VarV2', type=str,
-                        help='images directory')
-    parser.add_argument('--gt_dir', default='gt_density_map_adaptive_384_VarV2', type=str,
-                        help='ground truth directory')
-    parser.add_argument('--output_dir', default='./data/out/pre_4_dir',
-                        help='path where to save, empty for no saving')
-    parser.add_argument('--device', default='cuda:5',
-                        help='device to use for training / testing')
-    parser.add_argument('--seed', default=0, type=int)
-    parser.add_argument('--resume', default='./weights/mae_pretrain_vit_base_full.pth',  # mae_visualize_vit_base
-                        help='resume from checkpoint')
-    # Training parameters
-    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
-                        help='start epoch')
-    parser.add_argument('--num_workers', default=10, type=int)
-    parser.add_argument('--pin_mem', action='store_true',
-                        help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
-    parser.add_argument('--no_pin_mem', action='store_false', dest='pin_mem')
-    parser.set_defaults(pin_mem=True)
-    # Distributed training parameters
-    parser.add_argument('--world_size', default=1, type=int,
-                        help='number of distributed processes')
-    parser.add_argument('--local_rank', default=-1, type=int)
-    parser.add_argument('--dist_on_itp', action='store_true')
-    parser.add_argument('--dist_url', default='env://',
-                        help='url used to set up distributed training')
-    # Logging parameters
-    parser.add_argument('--log_dir', default='./logs/pre_4_dir',
-                        help='path where to tensorboard log')
-    parser.add_argument("--title", default="CounTR_pretraining", type=str)
-    parser.add_argument("--wandb", default="counting", type=str)
-    parser.add_argument("--team", default="wsense", type=str)
-    parser.add_argument("--wandb_id", default=None, type=str)
-    parser.add_argument('--anno_file_negative', default='annotation_FSC147_negative1.json', type=str,
-                     help='annotation json file')
-    return parser
-os.environ["CUDA_LAUNCH_BLOCKING"] = '5'
-class TrainData(Dataset):
-    def __init__(self):
-        self.img = data_split['train']
-        random.shuffle(self.img)
-        self.img_dir = im_dir
-        self.TransformPreTrain = transform_pre_train(data_path)
-    def __len__(self):
-        return len(self.img)
-    def __getitem__(self, idx):
-        im_id = self.img[idx]
-        anno = annotations[im_id]
-        bboxes = anno['box_examples_coordinates']
-        # box_coordinates = anno.get('box_examples_coordinates', {})  # 获取图像的边界框坐标信息
-        # # print(box_coordinates)
-        # # 获取第一个类别的边界框坐标列表
-        # first_category = next(iter(box_coordinates), None)
-        # # print(first_category)
-        # first_category_bboxes = box_coordinates[first_category]
-        # if first_category_bboxes:
-        #     # print(first_category_bboxes[0])
-        #     bboxes = first_category_bboxes[0]
-        # else:
-        #     bboxes = []
-        # # if first_category_bboxes:
-        # #     bboxes = first_category_bboxes[0]
-        # # else:
-        # #     pass
-        rects = list()
-        for bbox in bboxes:
-            x1 = bbox[0][0]
-            y1 = bbox[0][1]
-            x2 = bbox[2][0]
-            y2 = bbox[2][1]
-            rects.append([y1, x1, y2, x2])
-        image = Image.open('{}/{}'.format(im_dir, im_id))
-        image.load()
-        density_path = gt_dir / (im_id.split(".jpg")[0] + ".npy")
-        density = np.load(density_path).astype('float32')
-        sample = {'image': image, 'lines_boxes': rects, 'gt_density': density}
-        sample = self.TransformPreTrain(sample)
-        return sample['image']
-def main(args):
-    misc.init_distributed_mode(args)
-    print('job dir: {}'.format(os.path.dirname(os.path.realpath(__file__))))
-    print("{}".format(args).replace(', ', ',\n'))
-    device = torch.device(args.device)
-    # fix the seed for reproducibility
-    seed = args.seed + misc.get_rank()
-    torch.manual_seed(seed)
-    np.random.seed(seed)
-    cudnn.benchmark = True
-    dataset_train = TrainData()
-    print(dataset_train)
-    if True:  # args.distributed:
-        num_tasks = misc.get_world_size()
-        global_rank = misc.get_rank()
-        sampler_train = torch.utils.data.DistributedSampler(
-            dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True
-        )
-        print("Sampler_train = %s" % str(sampler_train))
-    else:
-        sampler_train = torch.utils.data.RandomSampler(dataset_train)
-    if global_rank == 0:
-        if args.log_dir is not None:
-            os.makedirs(args.log_dir, exist_ok=True)
-            log_writer = SummaryWriter(log_dir=args.log_dir)
-        else:
-            log_writer = None
-        if args.wandb is not None:
-            wandb_run = wandb.init(
-                config=args,
-                resume="allow",
-                project=args.wandb,
-                name=args.title,
-                # entity=args.team,
-                tags=["CounTR", "pretraining"],
-                id=args.wandb_id,
-            )
-        else:
-            wandb_run = None
-    data_loader_train = torch.utils.data.DataLoader(
-        dataset_train, sampler=sampler_train,
-        batch_size=args.batch_size,
-        num_workers=args.num_workers,
-        pin_memory=args.pin_mem,
-        drop_last=False,
-    )
-    # define the model
-    model = models_mae_noct.__dict__[args.model](norm_pix_loss=args.norm_pix_loss)
-    model.to(device)
-    model_without_ddp = model
-    print("Model = %s" % str(model_without_ddp))
-    eff_batch_size = args.batch_size * args.accum_iter * misc.get_world_size()
-    if args.lr is None:  # only base_lr is specified
-        args.lr = args.blr * eff_batch_size / 256
-    print("base lr: %.2e" % (args.lr * 256 / eff_batch_size))
-    print("actual lr: %.2e" % args.lr)
-    print("accumulate grad iterations: %d" % args.accum_iter)
-    print("effective batch size: %d" % eff_batch_size)
-    if args.distributed:
-        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
-        model_without_ddp = model.module
-    # following timm: set wd as 0 for bias and norm layers
-    param_groups = optim_factory.add_weight_decay(model_without_ddp, args.weight_decay)
-    optimizer = torch.optim.AdamW(param_groups, lr=args.lr, betas=(0.9, 0.95))
-    print(optimizer)
-    loss_scaler = NativeScaler()
-    misc.load_model(args=args, model_without_ddp=model_without_ddp, optimizer=optimizer, loss_scaler=loss_scaler)
-    print(f"Start training for {args.epochs} epochs")
-    start_time = time.time()
-    for epoch in range(args.start_epoch, args.epochs):
-        if args.distributed:
-            data_loader_train.sampler.set_epoch(epoch)
-        # train one epoch
-        model.train(True)
-        metric_logger = misc.MetricLogger(delimiter="  ")
-        metric_logger.add_meter('lr', misc.SmoothedValue(window_size=1, fmt='{value:.6f}'))
-        header = 'Epoch: [{}]'.format(epoch)
-        print_freq = 20
-        accum_iter = args.accum_iter
-        optimizer.zero_grad()
-        if log_writer is not None:
-            print('log_dir: {}'.format(log_writer.log_dir))
-        model_ = getattr(models_mae_noct, args.model)()
-        for data_iter_step, samples in enumerate(metric_logger.log_every(data_loader_train, print_freq, header)):
-            epoch_1000x = int((data_iter_step / len(data_loader_train) + epoch) * 1000)
-            if data_iter_step % accum_iter == 0:
-                lr_sched.adjust_learning_rate(optimizer, data_iter_step / len(data_loader_train) + epoch, args)
-            samples = samples.to(device, non_blocking=True)
-            with torch.cuda.amp.autocast():
-                loss, pred, mask = model(samples, mask_ratio=args.mask_ratio)
-            loss_value = loss.item()
-            if data_iter_step % 2000 == 0:
-                preds = model_.unpatchify(pred)
-                preds = preds.float()
-                preds = torch.einsum('nchw->nhwc', preds)
-                preds = torch.clip(preds, 0, 1)
-                if log_writer is not None:
-                    log_writer.add_images('reconstruction', preds, int(epoch), dataformats='NHWC')
-                if wandb_run is not None:
-                    wandb_images = []
-                    w_samples = torch.einsum('nchw->nhwc', samples.float()).clip(0, 1)
-                    masks = F.interpolate(
-                        mask.reshape(shape=(mask.shape[0], 1, int(mask.shape[1] ** .5), int(mask.shape[1] ** .5))),
-                        size=(preds.shape[1], preds.shape[2]))
-                    masks = torch.einsum('nchw->nhwc', masks.float())
-                    combos = (w_samples + masks.repeat(1, 1, 1, 3)).clip(0, 1)
-                    w_images = (torch.cat([w_samples, combos, preds], dim=2) * 255).detach().cpu()
-                    print("w_images:", w_samples.shape, combos.shape, preds.shape, "-->", w_images.shape)
-                    for i in range(w_images.shape[0]):
-                        wi = w_images[i, :, :, :]
-                        wandb_images += [wandb.Image(wi.numpy().astype(np.uint8),
-                                                     caption=f"Prediction {i} at epoch {epoch}")]
-                    wandb.log({f"reconstruction": wandb_images}, step=epoch_1000x, commit=False)
-            if not math.isfinite(loss_value):
-                print("Loss is {}, stopping training".format(loss_value))
-                sys.exit(1)
-            loss /= accum_iter
-            loss_scaler(loss, optimizer, parameters=model.parameters(),
-                        update_grad=(data_iter_step + 1) % accum_iter == 0)
-            if (data_iter_step + 1) % accum_iter == 0:
-                optimizer.zero_grad()
-            torch.cuda.synchronize()
-            metric_logger.update(loss=loss_value)
-            lr = optimizer.param_groups[0]["lr"]
-            metric_logger.update(lr=lr)
-            loss_value_reduce = misc.all_reduce_mean(loss_value)
-            if (data_iter_step + 1) % accum_iter == 0:
-                if log_writer is not None:
-                    """ We use epoch_1000x as the x-axis in tensorboard.
-                    This calibrates different curves when batch size changes.
-                    """
-                    log_writer.add_scalar('train_loss', loss_value_reduce, epoch_1000x)
-                    log_writer.add_scalar('lr', lr, epoch_1000x)
-                if wandb_run is not None:
-                    log = {"train/loss": loss_value_reduce, "train/lr": lr}
-                    wandb.log(log, step=epoch_1000x, commit=True if data_iter_step == 0 else False)
-        metric_logger.synchronize_between_processes()
-        print("Averaged stats:", metric_logger)
-        train_stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
-        # save train status and model
-        if args.output_dir and (epoch % 100 == 0 or epoch + 1 == args.epochs):
-            misc.save_model(args=args, model=model, model_without_ddp=model_without_ddp, optimizer=optimizer,
-                            loss_scaler=loss_scaler, epoch=epoch, suffix=f"pretraining_{epoch}")
-        log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
-                     'epoch': epoch, }
-        if args.output_dir and misc.is_main_process():
-            if log_writer is not None:
-                log_writer.flush()
-            with open(os.path.join(args.output_dir, "log.txt"), mode="a", encoding="utf-8") as f:
-                f.write(json.dumps(log_stats) + "\n")
-    total_time = time.time() - start_time
-    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-    print('Training time {}'.format(total_time_str))
-    wandb.run.finish()
-if __name__ == '__main__':
-    args = get_args_parser()
-    args = args.parse_args()
-    # load data
-    data_path = Path(args.data_path)
-    anno_file = data_path / args.anno_file
-    data_split_file = data_path / args.data_split_file
-    im_dir = data_path / args.im_dir
-    gt_dir = data_path / args.gt_dir
-    with open(anno_file) as f:
-        annotations = json.load(f)
-    with open(data_split_file) as f:
-        data_split = json.load(f)
-    if args.output_dir:
-        Path(args.output_dir).mkdir(parents=True, exist_ok=True)
-    main(args)

VA-Count-main/VA-Count-main/FSC_tain.py DELETED Viewed

@@ -1,532 +0,0 @@
-import argparse
-import datetime
-import json
-import numpy as np
-import os
-import time
-import random
-from pathlib import Path
-import sys
-from PIL import Image
-import torch.nn.functional as F
-import torch
-import torch.backends.cudnn as cudnn
-from torch.utils.data import Dataset
-import torchvision
-import wandb
-import timm
-from tqdm import tqdm
-assert "0.4.5" <= timm.__version__ <= "0.4.9"  # version check
-import timm.optim.optim_factory as optim_factory
-import util.misc as misc
-from util.misc import NativeScalerWithGradNormCount as NativeScaler
-import util.lr_sched as lr_sched
-from util.FSC147 import transform_train, transform_val
-import models_mae_cross
-def get_args_parser():
-    parser = argparse.ArgumentParser('MAE pre-training', add_help=True)
-    parser.add_argument('--batch_size', default=26, type=int,
-                        help='Batch size per GPU (effective batch size is batch_size * accum_iter * # gpus)')
-    parser.add_argument('--epochs', default=200, type=int)
-    parser.add_argument('--accum_iter', default=1, type=int,
-                        help='Accumulate gradient iterations (for increasing the effective batch size under memory constraints)')
-    # Model parameters
-    parser.add_argument('--model', default='mae_vit_base_patch16', type=str, metavar='MODEL',
-                        help='Name of model to train')
-    parser.add_argument('--mask_ratio', default=0.5, type=float,
-                        help='Masking ratio (percentage of removed patches).')
-    parser.add_argument('--norm_pix_loss', action='store_true',
-                        help='Use (per-patch) normalized pixels as targets for computing loss')
-    parser.set_defaults(norm_pix_loss=False)
-    # Optimizer parameters
-    parser.add_argument('--weight_decay', type=float, default=0.05,
-                        help='weight decay (default: 0.05)')
-    parser.add_argument('--lr', type=float, default=None, metavar='LR',
-                        help='learning rate (absolute lr)')
-    parser.add_argument('--blr', type=float, default=1e-3, metavar='LR',
-                        help='base learning rate: absolute_lr = base_lr * total_batch_size / 256')
-    parser.add_argument('--min_lr', type=float, default=0., metavar='LR',
-                        help='lower lr bound for cyclic schedulers that hit 0')
-    parser.add_argument('--warmup_epochs', type=int, default=10, metavar='N',
-                        help='epochs to warmup LR')
-    # Dataset parameters
-    parser.add_argument('--data_path', default='./data/FSC147/', type=str,
-                        help='dataset path')
-    parser.add_argument('--anno_file', default='annotation_FSC147_pos.json', type=str,
-                        help='annotation json file for positive samples')
-    parser.add_argument('--anno_file_negative', default='./data/FSC147/annotation_FSC147_neg.json', type=str,
-                        help='annotation json file for negative samples')
-    parser.add_argument('--data_split_file', default='Train_Test_Val_FSC_147.json', type=str,
-                        help='data split json file')
-    parser.add_argument('--class_file', default='ImageClasses_FSC147.txt', type=str,
-                        help='class json file')
-    parser.add_argument('--im_dir', default='images_384_VarV2', type=str,
-                        help='images directory')
-    parser.add_argument('--output_dir', default='./data/out/fim6_dir',
-                        help='path where to save, empty for no saving')
-    parser.add_argument('--device', default='cuda',
-                        help='device to use for training / testing')
-    parser.add_argument('--seed', default=0, type=int)
-    parser.add_argument('--resume', default='./data/checkpoint.pth',
-                        help='resume from checkpoint')
-    parser.add_argument('--do_resume', action='store_true',
-                        help='Resume training (e.g. if crashed).')
-    # Training parameters
-    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
-                        help='start epoch')
-    parser.add_argument('--num_workers', default=10, type=int)
-    parser.add_argument('--pin_mem', action='store_true',
-                        help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
-    parser.add_argument('--no_pin_mem', action='store_false', dest='pin_mem')
-    parser.set_defaults(pin_mem=True)
-    parser.add_argument('--do_aug', action='store_true',
-                        help='Perform data augmentation.')
-    parser.add_argument('--no_do_aug', action='store_false', dest='do_aug')
-    parser.set_defaults(do_aug=True)
-    # Distributed training parameters
-    parser.add_argument('--world_size', default=1, type=int,
-                        help='number of distributed processes')
-    parser.add_argument('--local_rank', default=-1, type=int)
-    parser.add_argument('--dist_on_itp', action='store_true')
-    parser.add_argument('--dist_url', default='env://',
-                        help='url used to set up distributed training')
-    # Logging parameters
-    parser.add_argument("--title", default="count", type=str)
-    parser.add_argument("--wandb", default="240227", type=str)
-    parser.add_argument("--team", default="wsense", type=str)
-    parser.add_argument("--wandb_id", default=None, type=str)
-    return parser
-os.environ["CUDA_LAUNCH_BLOCKING"] = '0'
-class TrainData(Dataset):
-    def __init__(self, args, split='train', do_aug=True):
-        with open(args.anno_file) as f:
-            annotations = json.load(f)
-                # Load negative annotations
-        with open(args.anno_file_negative) as f:
-            neg_annotations = json.load(f)
-        with open(args.data_split_file) as f:
-            data_split = json.load(f)
-        self.img = data_split[split]
-        random.shuffle(self.img)
-        self.split = split
-        self.img_dir = im_dir
-        self.TransformTrain = transform_train(args, do_aug=do_aug)
-        self.TransformVal = transform_val(args)
-        self.annotations = annotations
-        self.neg_annotations = neg_annotations
-        self.im_dir = im_dir
-    def __len__(self):
-        return len(self.img)
-    def __getitem__(self, idx):
-        im_id = self.img[idx]
-        anno = self.annotations[im_id]
-        bboxes = anno['box_examples_coordinates']
-        dots = np.array(anno['points'])
-        # 加载负样本的框
-        neg_anno = self.neg_annotations[im_id]  # 假设每个图像ID在负样本注释中都有对应的条目
-        neg_bboxes = neg_anno['box_examples_coordinates']
-        rects = list()
-        for bbox in bboxes:
-            x1 = bbox[0][0]
-            y1 = bbox[0][1]
-            x2 = bbox[2][0]
-            y2 = bbox[2][1]
-            if x1 < 0:
-                x1 = 0
-            if x2 < 0:
-                x2 = 0
-            if y1 < 0:
-                y1 = 0
-            if y2 < 0:
-                y2 = 0
-            rects.append([y1, x1, y2, x2])
-        neg_rects = list()
-        for neg_bbox in neg_bboxes:
-            x1 = neg_bbox[0][0]
-            y1 = neg_bbox[0][1]
-            x2 = neg_bbox[2][0]
-            y2 = neg_bbox[2][1]
-            if x1 < 0:
-                x1 = 0
-            if x2 < 0:
-                x2 = 0
-            if y1 < 0:
-                y1 = 0
-            if y2 < 0:
-                y2 = 0
-            neg_rects.append([y1, x1, y2, x2])
-        image = Image.open('{}/{}'.format(self.im_dir, im_id))
-        if image.mode == "RGBA":
-            image = image.convert("RGB")
-        image.load()
-        m_flag = 0
-        sample = {'image': image, 'lines_boxes': rects, 'neg_lines_boxes': neg_rects,'dots': dots, 'id': im_id, 'm_flag': m_flag}
-        sample = self.TransformTrain(sample) if self.split == "train" else self.TransformVal(sample)
-        return sample['image'], sample['gt_density'], len(dots), sample['boxes'],sample['neg_boxes'], sample['pos'],sample['m_flag'], im_id
-def main(args):
-    wandb_run = None
-    try:
-        misc.init_distributed_mode(args)
-        print('job dir: {}'.format(os.path.dirname(os.path.realpath(__file__))))
-        print("{}".format(args).replace(', ', ',\n'))
-        device = torch.device(args.device)
-        # if torch.cuda.is_available():
-        #     device = torch.device("cuda:5")
-        # fix the seed for reproducibility
-        seed = args.seed + misc.get_rank()
-        torch.manual_seed(seed)
-        np.random.seed(seed)
-        cudnn.benchmark = True
-        dataset_train = TrainData(args, do_aug=args.do_aug)
-        dataset_val = TrainData(args, split='val')
-        num_tasks = misc.get_world_size()
-        global_rank = misc.get_rank()
-        sampler_train = torch.utils.data.DistributedSampler(
-            dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True
-        )
-        sampler_val = torch.utils.data.DistributedSampler(
-            dataset_val, num_replicas=num_tasks, rank=global_rank, shuffle=True
-        )
-        if global_rank == 0:
-            if args.wandb is not None:
-                wandb_run = wandb.init(
-                    config=args,
-                    resume="allow",
-                    project=args.wandb,
-                    name=args.title,
-                    # entity=args.team,
-                    tags=["count", "finetuning"],
-                    id=args.wandb_id,
-                )
-        data_loader_train = torch.utils.data.DataLoader(
-            dataset_train, sampler=sampler_train,
-            batch_size=args.batch_size,
-            num_workers=args.num_workers,
-            pin_memory=args.pin_mem,
-            drop_last=False,
-        )
-        data_loader_val = torch.utils.data.DataLoader(
-            dataset_val, sampler=sampler_val,
-            batch_size=args.batch_size,
-            num_workers=args.num_workers,
-            pin_memory=args.pin_mem,
-            drop_last=False,
-        )
-        # define the model
-        model = models_mae_cross.__dict__[args.model](norm_pix_loss=args.norm_pix_loss)
-        model.to(device)
-        model_without_ddp = model
-        # print("Model = %s" % str(model_without_ddp))
-        eff_batch_size = args.batch_size * args.accum_iter * misc.get_world_size()
-        if args.lr is None:  # only base_lr is specified
-            args.lr = args.blr * eff_batch_size / 256
-        print("base lr: %.2e" % (args.lr * 256 / eff_batch_size))
-        print("actual lr: %.2e" % args.lr)
-        print("accumulate grad iterations: %d" % args.accum_iter)
-        print("effective batch size: %d" % eff_batch_size)
-        if args.distributed:
-            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
-            model_without_ddp = model.module
-        # following timm: set wd as 0 for bias and norm layers
-        param_groups = optim_factory.add_weight_decay(model_without_ddp, args.weight_decay)
-        optimizer = torch.optim.AdamW(param_groups, lr=args.lr, betas=(0.9, 0.95))
-        print(optimizer)
-        loss_scaler = NativeScaler()
-        min_MAE = 99999
-        print_freq = 50
-        save_freq = 50
-        misc.load_model_FSC_full(args=args, model_without_ddp=model_without_ddp, optimizer=optimizer, loss_scaler=loss_scaler)
-        print(f"Start training for {args.epochs - args.start_epoch} epochs   -   rank {global_rank}")
-        start_time = time.time()
-        for epoch in range(args.start_epoch, args.epochs):
-            if args.distributed:
-                data_loader_train.sampler.set_epoch(epoch)
-            # train one epoch
-            model.train(True)
-            accum_iter = args.accum_iter
-            # some parameters in training
-            train_mae = torch.tensor([0], dtype=torch.float64, device=device)
-            train_mse = torch.tensor([0], dtype=torch.float64, device=device)
-            val_mae = torch.tensor([0], dtype=torch.float64, device=device)
-            val_mse = torch.tensor([0], dtype=torch.float64, device=device)
-            val_nae = torch.tensor([0], dtype=torch.float64, device=device)
-            optimizer.zero_grad()
-            for data_iter_step, (samples, gt_density, _, pos_boxes, neg_boxes, pos, m_flag, im_names) in enumerate(
-                tqdm(data_loader_train, total=len(data_loader_train), desc=f"Train [e. {epoch} - r. {global_rank}]")):
-                idx = data_iter_step + (epoch * len(data_loader_train))
-                if data_iter_step % accum_iter == 0:
-                    lr_sched.adjust_learning_rate(optimizer, data_iter_step / len(data_loader_train) + epoch, args)
-                samples = samples.to(device, non_blocking=True, dtype=torch.half)
-                gt_density = gt_density.to(device, non_blocking=True, dtype=torch.half)
-                pos_boxes = pos_boxes.to(device, non_blocking=True, dtype=torch.half)
-                neg_boxes = neg_boxes.to(device, non_blocking=True, dtype=torch.half)
-    # 如果至少有一个图像在批处理中使用了Type 2 Mosaic，则禁止0-shot。
-                flag = 0
-                for i in range(m_flag.shape[0]):
-                    flag += m_flag[i].item()
-                if flag == 0:
-                    shot_num = random.randint(0, 3)
-                else:
-                    shot_num = random.randint(1, 3)
-                with torch.cuda.amp.autocast():
-                    pos_output = model(samples, pos_boxes, shot_num)  # 正样本输出
-    # 计算正样本损失
-                mask = np.random.binomial(n=1, p=0.8, size=[384, 384])
-                masks = np.tile(mask, (pos_output.shape[0], 1))
-                masks = masks.reshape(pos_output.shape[0], 384, 384)
-                masks = torch.from_numpy(masks).to(device)
-                pos_loss = ((pos_output - gt_density) ** 2)
-                pos_loss = (pos_loss * masks / (384 * 384)).sum() / pos_output.shape[0]
-    # 负样本输出
-                with torch.cuda.amp.autocast():
-                    neg_output = model(samples, neg_boxes, 1)  # 负样本输出
-                cnt1 = 1-torch.exp(-(torch.abs(pos_output.sum()/60 - gt_density.sum()/60).mean()))
-                if neg_output.shape[0] == 0:
-                    cnt2 = 0
-                else:
-                    # cnt2 = torch.log(torch.abs((neg_output.sum() / neg_output.shape[0]) - 1).mean()+1)
-                    cnt2 = 1-torch.exp(-(torch.abs((neg_output.sum() / (neg_output.shape[0]*60)) - 1).mean()))
-                cnt = cnt1+cnt2
-    # 计算正样本损失
-                mask = np.random.binomial(n=1, p=0.8, size=[384, 384])
-                masks = np.tile(mask, (neg_output.shape[0], 1))
-                masks = masks.reshape(neg_output.shape[0], 384, 384)
-                masks = torch.from_numpy(masks).to(device)
-                neg_loss = ((neg_output - gt_density) ** 2)
-                if neg_output.shape[0] == 0:
-                    neg_loss = 1
-                else:
-                    neg_loss = (neg_loss * masks / (384 * 384)).sum() / neg_output.shape[0]
-                margin = 0.5
-                contrastive_loss = torch.relu(pos_loss - neg_loss + margin)
-                total_loss = contrastive_loss+pos_loss
-    # 更新 MAE 和 RMSE
-                with torch.no_grad():
-                    pred_cnt = (pos_output.view(len(samples), -1)).sum(1) / 60
-                    gt_cnt = (gt_density.view(len(samples), -1)).sum(1) / 60
-                    cnt_err = torch.abs(pred_cnt - gt_cnt).float()
-                    batch_mae = cnt_err.double().mean()
-                    batch_mse = (cnt_err ** 2).double().mean()
-                train_mae += batch_mae
-                train_mse += batch_mse
-                if not torch.isfinite(total_loss):
-                    print("Loss is {}, stopping training".format(total_loss))
-                    sys.exit(1)
-                total_loss /= accum_iter
-                loss_scaler(total_loss, optimizer, parameters=model.parameters(),
-                            update_grad=(data_iter_step + 1) % accum_iter == 0)
-                if (data_iter_step + 1) % accum_iter == 0:
-                    optimizer.zero_grad()
-                lr = optimizer.param_groups[0]["lr"]
-                loss_value_reduce = misc.all_reduce_mean(total_loss)
-                if (data_iter_step + 1) % (print_freq * accum_iter) == 0 and (data_iter_step + 1) != len(data_loader_train) and data_iter_step != 0:
-                    if wandb_run is not None:
-                        log = {"train/loss": loss_value_reduce,
-                               "train/lr": lr,
-                               "train/MAE": batch_mae,
-                               "train/RMSE": batch_mse ** 0.5}
-                        wandb.log(log, step=idx)
-            # evaluation on Validation split
-            for val_samples, val_gt_density, val_n_ppl, val_boxes,_, val_pos, _, val_im_names in \
-                tqdm(data_loader_val, total=len(data_loader_val),
-                     desc=f"Val [e. {epoch} - r. {global_rank}]"):
-                val_samples = val_samples.to(device, non_blocking=True, dtype=torch.half)
-                val_gt_density = val_gt_density.to(device, non_blocking=True, dtype=torch.half)
-                val_boxes = val_boxes.to(device, non_blocking=True, dtype=torch.half)
-                val_n_ppl = val_n_ppl.to(device, non_blocking=True)
-                shot_num = random.randint(0, 3)
-                with torch.no_grad():
-                    with torch.cuda.amp.autocast():
-                        val_output = model(val_samples, val_boxes, shot_num)
-                    val_pred_cnt = (val_output.view(len(val_samples), -1)).sum(1) / 60
-                    val_gt_cnt = (val_gt_density.view(len(val_samples), -1)).sum(1) / 60
-                    # print('val_pred_cnt',val_pred_cnt)
-                    # print('val_gt_cnt',val_gt_cnt)
-                    val_cnt_err = torch.abs(val_pred_cnt - val_gt_cnt).float()
-                    # print('val_cnt_err',val_cnt_err.mean())
-                    val_cnt_err[val_cnt_err == float('inf')] = 0
-                    val_mae += val_cnt_err.double().mean()
-                    # val_mae += val_cnt_err
-                    # print('val_mae',val_mae.mean())
-                    val_cnt_err[val_cnt_err == float('inf')] = 0
-                    val_mse += (val_cnt_err ** 2).double().mean()
-                    # val_mse += (val_cnt_err ** 2)
-                    _val_nae = val_cnt_err / val_gt_cnt
-                    _val_nae[_val_nae == float('inf')] = 0
-                    val_nae += _val_nae.double().mean()
-            # val_mae = val_mae/len(data_loader_val)
-            # val_mse = val_mse/len(data_loader_val)
-            # print('val_mae',val_mae)
-            # print('val_mse',val_mse)
-            # Output visualisation information to W&B
-            if wandb_run is not None:
-                train_wandb_densities = []
-                train_wandb_bboxes = []
-                val_wandb_densities = []
-                val_wandb_bboxes = []
-                black = torch.zeros([384, 384], device=device)
-                for i in range(pos_output.shape[0]):
-                    # gt and predicted density
-                    w_d_map = torch.stack([pos_output[i], black, black])
-                    gt_map = torch.stack([gt_density[i], black, black])
-                    box_map = misc.get_box_map(samples[i], pos[i], device)
-                    w_gt_density = samples[i] / 2 + gt_map + box_map
-                    w_d_map_overlay = samples[i] / 2 + w_d_map
-                    w_densities = torch.cat([w_gt_density, w_d_map, w_d_map_overlay], dim=2)
-                    w_densities = torch.clamp(w_densities, 0, 1)
-                    train_wandb_densities += [wandb.Image(torchvision.transforms.ToPILImage()(w_densities),
-                                                          caption=f"[E#{epoch}] {im_names[i]} ({torch.sum(gt_density[i]).item()}, {torch.sum(pos_output[i]).item()})")]
-                    # exemplars
-                    w_boxes = torch.cat([pos_boxes[i][x, :, :, :] for x in range(pos_boxes[i].shape[0])], 2)
-                    train_wandb_bboxes += [wandb.Image(torchvision.transforms.ToPILImage()(w_boxes),
-                                                       caption=f"[E#{epoch}] {im_names[i]}")]
-                for i in range(val_output.shape[0]):
-                    # gt and predicted density
-                    w_d_map = torch.stack([val_output[i], black, black])
-                    gt_map = torch.stack([val_gt_density[i], black, black])
-                    box_map = misc.get_box_map(val_samples[i], val_pos[i], device)
-                    w_gt_density = val_samples[i] / 2 + gt_map + box_map
-                    w_d_map_overlay = val_samples[i] / 2 + w_d_map
-                    w_densities = torch.cat([w_gt_density, w_d_map, w_d_map_overlay], dim=2)
-                    w_densities = torch.clamp(w_densities, 0, 1)
-                    val_wandb_densities += [wandb.Image(torchvision.transforms.ToPILImage()(w_densities),
-                                                        caption=f"[E#{epoch}] {val_im_names[i]} ({torch.sum(val_gt_density[i]).item()}, {torch.sum(val_output[i]).item()})")]
-                    # exemplars
-                    w_boxes = torch.cat([val_boxes[i][x, :, :, :] for x in range(val_boxes[i].shape[0])], 2)
-                    val_wandb_bboxes += [wandb.Image(torchvision.transforms.ToPILImage()(w_boxes),
-                                                     caption=f"[E#{epoch}] {val_im_names[i]}")]
-                log = {"train/loss": loss_value_reduce,
-                       "train/lr": lr,
-                       "train/MAE": batch_mae,
-                       "train/RMSE": batch_mse ** 0.5,
-                       "val/MAE": val_mae / len(data_loader_val),
-                       "val/RMSE": (val_mse / len(data_loader_val)) ** 0.5,
-                       "val/NAE": val_nae / len(data_loader_val),
-                       "train_densitss": train_wandb_densities,
-                       "val_densites": val_wandb_densities,
-                       "train_boxes": train_wandb_bboxes,
-                       "val_boxes": val_wandb_bboxes}
-                wandb.log(log, step=idx)
-            # save train status and model
-            if args.output_dir and (epoch % save_freq == 0 or epoch + 1 == args.epochs) and epoch != 0:
-                misc.save_model(
-                    args=args, model=model, model_without_ddp=model_without_ddp, optimizer=optimizer,
-                    loss_scaler=loss_scaler, epoch=epoch, suffix=f"finetuning_{epoch}", upload=epoch % 100 == 0)
-            elif True:
-                misc.save_model(
-                    args=args, model=model, model_without_ddp=model_without_ddp, optimizer=optimizer,
-                    loss_scaler=loss_scaler, epoch=epoch, suffix=f"finetuning_last", upload=False)
-            if args.output_dir and val_mae / len(data_loader_val) < min_MAE:
-                min_MAE = val_mae / len(data_loader_val)
-                misc.save_model(
-                    args=args, model=model, model_without_ddp=model_without_ddp, optimizer=optimizer,
-                    loss_scaler=loss_scaler, epoch=epoch, suffix="finetuning_minMAE")
-            print(f'[Train Epoch #{epoch}] - MAE: {train_mae.item() / len(data_loader_train):5.2f}, RMSE: {(train_mse.item() / len(data_loader_train)) ** 0.5:5.2f}', flush=True)
-            print(f'[Val Epoch #{epoch}] - MAE: {val_mae.item() / len(data_loader_val):5.2f}, RMSE: {(val_mse.item() / len(data_loader_val)) ** 0.5:5.2f}, NAE: {val_nae.item() / len(data_loader_val):5.2f}', flush=True)
-        total_time = time.time() - start_time
-        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-        print('Training time {}'.format(total_time_str))
-    finally:
-        if wandb_run is not None:
-            wandb.run.finish()
-if __name__ == '__main__':
-    args = get_args_parser()
-    args = args.parse_args()
-    data_path = Path(args.data_path)
-    anno_file = data_path / args.anno_file
-    data_split_file = data_path / args.data_split_file
-    im_dir = data_path / args.im_dir
-    if args.do_aug:
-        class_file = data_path / args.class_file
-    else:
-        class_file = None
-    args.anno_file = anno_file
-    args.data_split_file = data_split_file
-    args.im_dir = im_dir
-    args.class_file = class_file
-    if args.output_dir:
-        Path(args.output_dir).mkdir(parents=True, exist_ok=True)
-    main(args)

VA-Count-main/VA-Count-main/FSC_test.py DELETED Viewed

@@ -1,352 +0,0 @@
-import argparse
-import json
-import numpy as np
-import os
-from pathlib import Path
-from PIL import Image, ImageDraw
-import matplotlib.pyplot as plt
-import scipy.ndimage as ndimage
-import pandas as pd
-import random
-import torch
-import torch.nn as nn
-import torch.backends.cudnn as cudnn
-from torch.utils.data import Dataset
-import torchvision
-from torchvision import transforms
-import torchvision.transforms.functional as TF
-import timm
-from util.FSC147 import transform_train, transform_val
-from tqdm import tqdm
-assert "0.4.5" <= timm.__version__ <= "0.4.9"  # version check
-import util.misc as misc
-import models_mae_cross
-def get_args_parser():
-    parser = argparse.ArgumentParser('MAE pre-training', add_help=False)
-    # Model parameters
-    parser.add_argument('--model', default='mae_vit_base_patch16', type=str, metavar='MODEL',
-                        help='Name of model to train')
-    parser.add_argument('--mask_ratio', default=0.5, type=float,
-                        help='Masking ratio (percentage of removed patches).')
-    parser.add_argument('--norm_pix_loss', action='store_true',
-                        help='Use (per-patch) normalized pixels as targets for computing loss')
-    parser.set_defaults(norm_pix_loss=False)
-    # Dataset parameters
-    parser.add_argument('--data_path', default='./data/FSC147/', type=str,
-                        help='dataset path')
-    parser.add_argument('--anno_file', default='annotation_FSC147_positive.json', type=str,
-                        help='annotation json file')
-    parser.add_argument('--anno_file_negative', default='./data/FSC147/annotation_FSC147_neg2.json', type=str,
-                        help='annotation json file')
-    parser.add_argument('--data_split_file', default='Train_Test_Val_FSC_147.json', type=str,
-                        help='data split json file')
-    parser.add_argument('--im_dir', default='images_384_VarV2', type=str,
-                        help='images directory')
-    parser.add_argument('--output_dir', default='./Image',
-                        help='path where to save, empty for no saving')
-    parser.add_argument('--device', default='cuda',
-                        help='device to use for training / testing')
-    parser.add_argument('--seed', default=0, type=int)
-    parser.add_argument('--resume', default='./output_fim6_dir/checkpoint-0.pth',
-                        help='resume from checkpoint')
-    parser.add_argument('--external', action='store_true',
-                        help='Set this param for using external exemplars')
-    parser.add_argument('--box_bound', default=-1, type=int,
-                        help='The max number of exemplars to be considered')
-    parser.add_argument('--split', default="test", type=str)
-    # Training parameters
-    parser.add_argument('--num_workers', default=0, type=int)
-    parser.add_argument('--pin_mem', action='store_true',
-                        help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
-    parser.add_argument('--no_pin_mem', action='store_false', dest='pin_mem')
-    parser.set_defaults(pin_mem=True)
-    parser.add_argument('--normalization', default=True, help='Set to False to disable test-time normalization')
-    # Distributed training parameters
-    parser.add_argument('--world_size', default=1, type=int,
-                        help='number of distributed processes')
-    parser.add_argument('--local_rank', default=-1, type=int)
-    parser.add_argument('--dist_on_itp', action='store_true')
-    parser.add_argument('--dist_url', default='env://',
-                        help='url used to set up distributed training')
-    return parser
-os.environ["CUDA_LAUNCH_BLOCKING"] = '5'
-class TestData(Dataset):
-    def __init__(self, args, split='val', do_aug=True):
-        with open(data_path/args.anno_file) as f:
-            annotations = json.load(f)
-                # Load negative annotations
-        with open(args.anno_file_negative) as f:
-            neg_annotations = json.load(f)
-        with open(data_path/args.data_split_file) as f:
-            data_split = json.load(f)
-        self.img = data_split[split]
-        random.shuffle(self.img)
-        self.split = split
-        self.img_dir = im_dir
-        # self.TransformTrain = transform_train(args, do_aug=do_aug)
-        self.TransformVal = transform_val(args)
-        self.annotations = annotations
-        self.neg_annotations = neg_annotations
-        self.im_dir = im_dir
-    def __len__(self):
-        return len(self.img)
-    def __getitem__(self, idx):
-        im_id = self.img[idx]
-        anno = self.annotations[im_id]
-        bboxes = anno['box_examples_coordinates']
-        dots = np.array(anno['points'])
-        # 加载负样本的框
-        neg_anno = self.neg_annotations[im_id]  # 假设每个图像ID在负样本注释中都有对应的条目
-        neg_bboxes = neg_anno['box_examples_coordinates']
-        rects = list()
-        for bbox in bboxes:
-            x1 = bbox[0][0]
-            y1 = bbox[0][1]
-            x2 = bbox[2][0]
-            y2 = bbox[2][1]
-            if x1 < 0:
-                x1 = 0
-            if x2 < 0:
-                x2 = 0
-            if y1 < 0:
-                y1 = 0
-            if y2 < 0:
-                y2 = 0
-            rects.append([y1, x1, y2, x2])
-        neg_rects = list()
-        for neg_bbox in neg_bboxes:
-            x1 = neg_bbox[0][0]
-            y1 = neg_bbox[0][1]
-            x2 = neg_bbox[2][0]
-            y2 = neg_bbox[2][1]
-            if x1 < 0:
-                x1 = 0
-            if x2 < 0:
-                x2 = 0
-            if y1 < 0:
-                y1 = 0
-            if y2 < 0:
-                y2 = 0
-            neg_rects.append([y1, x1, y2, x2])
-        image = Image.open('{}/{}'.format(self.im_dir, im_id))
-        if image.mode == "RGBA":
-            image = image.convert("RGB")
-        image.load()
-        m_flag = 0
-        sample = {'image': image, 'lines_boxes': rects,'neg_lines_boxes': neg_rects, 'dots': dots, 'id': im_id, 'm_flag': m_flag}
-        sample = self.TransformTrain(sample) if self.split == "train" else self.TransformVal(sample)
-        # if self.split == "train":
-        #     sample = self.TransformTrain(sample)
-        # # print(sample.keys())
-        return sample['image'], sample['gt_density'], len(dots), sample['boxes'], sample['neg_boxes'], sample['pos'],sample['m_flag'], im_id
-def batched_rmse(predictions, targets, batch_size=100):
-    """
-    分批计算RMSE
-    :param predictions: 模型预测的值，一个PyTorch张量
-    :param targets: 真实的值，一个PyTorch张量，与predictions形状相同
-    :param batch_size: 每个批次的大小
-    :return: RMSE值
-    """
-    total_mse = 0.0
-    total_count = 0
-    # 分批处理
-    for i in range(0, len(predictions), batch_size):
-        batch_predictions = predictions[i:i+batch_size]
-        batch_targets = targets[i:i+batch_size]
-        # 确保使用float64进行计算以提高精度
-        batch_predictions = batch_predictions.double()
-        batch_targets = batch_targets.double()
-        # 计算批次的MSE
-        difference = batch_predictions - batch_targets
-        mse = torch.mean(difference ** 2)
-        # 累加MSE和计数
-        total_mse += mse * len(batch_predictions)
-        total_count += len(batch_predictions)
-    # 计算平均MSE
-    avg_mse = total_mse / total_count
-    # 计算RMSE
-    rmse_val = torch.sqrt(avg_mse)
-    return rmse_val
-def batched_mae(predictions, targets, batch_size=100):
-    """
-    分批计算MAE
-    :param predictions: 模型预测的值，一个PyTorch张量
-    :param targets: 真实的值，一个PyTorch张量，与predictions形状相同
-    :param batch_size: 每个批次的大小
-    :return: MAE值
-    """
-    total_mae = 0.0
-    total_count = 0
-    # 分批处理
-    for i in range(0, len(predictions), batch_size):
-        batch_predictions = predictions[i:i+batch_size]
-        batch_targets = targets[i:i+batch_size]
-        # 计算批次的绝对误差
-        absolute_errors = torch.abs(batch_predictions - batch_targets)
-        # 累加绝对误差和计数
-        total_mae += torch.sum(absolute_errors)
-        total_count += len(batch_predictions)
-    # 计算平均绝对误差
-    avg_mae = total_mae / total_count
-    return avg_mae
-def main(args):
-    misc.init_distributed_mode(args)
-    print('job dir: {}'.format(os.path.dirname(os.path.realpath(__file__))))
-    print("{}".format(args).replace(', ', ',\n'))
-    device = torch.device(args.device)
-    # fix the seed for reproducibility
-    seed = args.seed + misc.get_rank()
-    torch.manual_seed(seed)
-    np.random.seed(seed)
-    cudnn.benchmark = True
-    # dataset_test = TestData(external=args.external, box_bound=args.box_bound, split=args.split)
-    dataset_test = TestData(args, split='test')
-    num_tasks = misc.get_world_size()
-    global_rank = misc.get_rank()
-    sampler_test = torch.utils.data.DistributedSampler(
-        dataset_test, num_replicas=num_tasks, rank=global_rank, shuffle=True
-    )
-    data_loader_test = torch.utils.data.DataLoader(
-        dataset_test, sampler=sampler_test,
-        batch_size=1,
-        num_workers=args.num_workers,
-        pin_memory=args.pin_mem,
-        drop_last=False,
-    )
-    # define the model
-    model = models_mae_cross.__dict__[args.model](norm_pix_loss=args.norm_pix_loss)
-    model.to(device)
-    model_without_ddp = model
-    if args.distributed:
-        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
-        model_without_ddp = model.module
-    misc.load_model_FSC(args=args, model_without_ddp=model_without_ddp)
-    print(f"Start testing.")
-    # test
-    model.eval()
-    # some parameters in training
-    train_mae = 0
-    train_rmse = 0
-    train_nae = 0
-    tot_load_time = 0
-    tot_infer_time = 0
-    loss_array = []
-    gt_array = []
-    pred_arr = []
-    name_arr = []
-    empties = []
-    total_mae = 0.0
-    total_mse = 0.0
-    total_nae = 0.0
-    total_count = 0
-    sub_batch_size = 50
-    for val_samples, val_gt_density, val_n_ppl, val_boxes,neg_val_boxes, val_pos, _, val_im_names in tqdm(data_loader_test, total=len(data_loader_test), desc="Validation"):
-        val_samples = val_samples.to(device, non_blocking=True, dtype=torch.float)  # 使用更高精度
-        val_gt_density = val_gt_density.to(device, non_blocking=True, dtype=torch.float)
-        val_boxes = val_boxes.to(device, non_blocking=True, dtype=torch.float)
-        neg_val_boxes = neg_val_boxes.to(device, non_blocking=True, dtype=torch.float)
-        num_samples = val_samples.size(0)
-        total_count += num_samples
-        for i in range(0, num_samples, sub_batch_size):
-            sub_val_samples = val_samples[i:i+sub_batch_size]
-            sub_val_gt_density = val_gt_density[i:i+sub_batch_size]
-            with torch.no_grad():
-                with torch.cuda.amp.autocast():
-                    sub_val_output = model(sub_val_samples, val_boxes[i:i+sub_batch_size], 3)
-            with torch.no_grad():
-                with torch.cuda.amp.autocast():
-                    neg_sub_val_output = model(sub_val_samples, neg_val_boxes[i:i+sub_batch_size], 3)
-                # output = torch.clamp((sub_val_output-neg_sub_val_output),min=0)
-                sub_val_pred_cnt = torch.abs(sub_val_output.sum()) / 60
-                # sub_val_pred_cnt = torch.abs(output.sum()) / 60
-                # neg_sub_val_pred_cnt = torch.abs(neg_sub_val_output.sum()) / 60
-                sub_val_gt_cnt = sub_val_gt_density.sum() / 60
-                sub_val_cnt_err = torch.abs(sub_val_pred_cnt - sub_val_gt_cnt)
-                # 逐项添加并检查
-                if not torch.isinf(sub_val_cnt_err) and not torch.isnan(sub_val_cnt_err):
-                    batch_mae = sub_val_cnt_err.item()
-                    batch_mse = sub_val_cnt_err.item() ** 2
-                    batch_nae = sub_val_cnt_err.item() / sub_val_gt_cnt.item() if sub_val_gt_cnt.item() != 0 else 0
-                    total_mae += batch_mae * sub_val_samples.size(0)
-                    total_mse += batch_mse * sub_val_samples.size(0)
-                    total_nae += batch_nae * sub_val_samples.size(0)
-                sub_val_pred_cnt = (sub_val_pred_cnt).int()
-    final_mae = total_mae / total_count
-    final_rmse = (total_mse / total_count) ** 0.5
-    final_nae = total_nae / total_count
-    print(f'MAE: {final_mae}, RMSE: {final_rmse}, NAE: {final_nae}')
-if __name__ == '__main__':
-    args = get_args_parser()
-    args = args.parse_args()
-    # load data
-    data_path = Path(args.data_path)
-    anno_file = data_path / args.anno_file
-    data_split_file = data_path / args.data_split_file
-    im_dir = data_path / args.im_dir
-    with open(anno_file) as f:
-        annotations = json.load(f)
-    with open(data_split_file) as f:
-        data_split = json.load(f)
-    if args.output_dir:
-        Path(args.output_dir).mkdir(parents=True, exist_ok=True)
-    main(args)

VA-Count-main/VA-Count-main/LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2022 Chang Liu
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

VA-Count-main/VA-Count-main/README.md DELETED Viewed

@@ -1,100 +0,0 @@
-# VA-Count
-[ECCV 2024] Zero-shot Object Counting with Good Exemplars
-[[paper](https://arxiv.org/abs/2407.04948)]
-![figure](figure.png)
-# Zero-shot Object Counting with Good Exemplars
-## News🚀
-* **2024.09.27**: Our code is released.
-* **2024.09.26**: Our inference code has been updated, and the code for selecting exemplars and the training code will be coming soon.
-* **2024.07.02**: VA-Count is accepted by ECCV2024.
-## Overview
-Overview of the proposed method. The proposed method focuses on two main elements: the Exemplar Enhancement Module (EEM) for improving exemplar quality through a patch selection integrated with Grounding DINO, and the Noise Suppression Module (NSM) that distinguishes between positive and negative class samples using density maps. It employs a Contrastive Loss function to refine the precision in identifying target class objects from others in an image.
-## Environment
-```
-pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html
-pip install timm==0.3.2
-pip install numpy
-pip install matplotlib tqdm
-pip install tensorboard
-pip install scipy
-pip install imgaug
-pip install opencv-python
-pip3 install hub
-```
-### For more information on Grounding DINO, please refer to the following link:
-[GroundingDINO](https://github.com/IDEA-Research/GroundingDINO)
-We are very grateful for the Grounding DINO approach, which has been instrumental in our work！
-## Datasets
-* [FSC147](https://github.com/cvlab-stonybrook/LearningToCountEverything)
-* [CARPK](https://lafi.github.io/LPN/)
-Preparing the datasets as follows:
-```
-./data/
-|--FSC147
-|  |--images_384_VarV2
-|  |  |--2.jpg
-|  |  |--3.jpg
-|  |--gt_density_map_adaptive_384_VarV2
-|  |  |--2.npy
-|  |  |--3.npy
-|  |--annotation_FSC147_384.json
-|  |--Train_Test_Val_FSC_147.json
-|  |--ImageClasses_FSC147.txt
-|  |--train.txt
-|  |--test.txt
-|  |--val.txt
-|--CARPK/
-|  |--Annotations/
-|  |--Images/
-|  |--ImageSets/
-```
-## Inference
-+  For inference, you can download the model from [Baidu-Disk](https://pan.baidu.com/s/11sbdDYLDfTOIPx5pZvBpmw?pwd=paeh), passward:paeh
-```
-python FSC_test.py --output_dir ./data/out/results_base --resume ./data/checkpoint_FSC.pth
-```
-## Single and Multiple Object Classifier Training
-```
-python datasetmake.py
-python biclassify.py
-```
-+  You can also directly download the model from [Baidu-Disk](https://pan.baidu.com/s/1fOF0giI3yQpvGTiNFUI7cQ?pwd=psum), passward:psum Save it in ./data/out/classify/
-## Generate exemplars
-```
-python grounding_pos.py --root_path ./data/FSC147/
-python grounding_neg.py --root_path ./data/FSC147/
-```
-## Train
-```
-CUDA_VISIBLE_DEVICES=0 python FSC_pretrain.py \
-    --epochs 500 \
-    --warmup_epochs 10 \
-    --blr 1.5e-4 --weight_decay 0.05
-```
-+  You can also directly download the pre-train model from [Baidu-Disk](https://pan.baidu.com/s/1_-w_9I4bPA66pMZkHTrdrg?pwd=xynw), passward:xynw Save it in ./data/
-```
-CUDA_VISIBLE_DEVICES=0 python FSC_train.py --epochs 1000 --batch_size 8 --lr 1e-5 --output_dir ./data/out/
-```
-## Citation
-```
-@inproceedings{zhu2024zero,
-  title={Zero-shot Object Counting with Good Exemplars},
-  author={Zhu, Huilin and Yuan, Jingling and Yang, Zhengwei and Guo, Yu and Wang, Zheng and Zhong, Xian and He, Shengfeng},
-  booktitle={Proceedings of the European Conference on Computer Vision},
-  year={2024}
-}
-```
-## Acknowledgement
-This project is based on the implementation from [CounTR](https://github.com/Verg-Avesta/CounTR), we are very grateful for this work and [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO).
-#### If you have any questions, please get in touch with me ([email protected]).

VA-Count-main/VA-Count-main/__pycache__/models_crossvit.cpython-38.pyc DELETED Viewed

Binary file (6.28 kB)

VA-Count-main/VA-Count-main/__pycache__/models_mae_cross.cpython-38.pyc DELETED Viewed

Binary file (6.69 kB)

VA-Count-main/VA-Count-main/__pycache__/models_mae_noct.cpython-38.pyc DELETED Viewed

Binary file (7.03 kB)

VA-Count-main/VA-Count-main/__pycache__/models_mae_noct.cpython-39.pyc DELETED Viewed

Binary file (6.96 kB)

VA-Count-main/VA-Count-main/biclassify.py DELETED Viewed

@@ -1,163 +0,0 @@
-import pandas as pd
-import os
-import torch
-from torch.utils.data import Dataset, DataLoader
-from torchvision.transforms import Compose, Resize, Normalize, ToTensor
-from PIL import Image
-import torch.nn as nn
-import torch.nn.functional as F
-from sklearn.model_selection import train_test_split
-import clip
-import re
-import torchvision.models as models
-# 1. 读取数据和预处理
-def read_label_file(file_path):
-    data = []
-    with open(file_path, 'r') as f:
-        for line in f.readlines():
-            image_name, label = line.strip().split(',')
-            data.append([image_name, 1 if label == 'one' else 0])
-    return pd.DataFrame(data, columns=['image', 'label'])
-# 读取a.txt中的图片名称
-with open('./data/FSC147/train.txt', 'r') as file:
-    a_txt_images = file.read().splitlines()
-# 提取.jpg前的数字
-a_txt_numbers = set([name.split('.')[0] for name in a_txt_images])
-# 从label.txt中读取图片名称和标签
-with open('./data/FSC147/one/labels.txt', 'r') as file:
-    label_txt_lines = file.read().splitlines()
-# 筛选出存在于a.txt中的图片
-filtered_images = []
-for line in label_txt_lines:
-    image_name, label = line.strip().split(',')
-    # 使用正则表达式匹配开头的数字
-    match = re.match(r'(\d+)', image_name)
-    if match:
-        image_number = match.group(1)
-        if image_number in a_txt_numbers:
-            # 转换'label'的值
-            label_value = 1 if label == 'one' else 0
-            filtered_images.append([image_name, label_value])  # 注意这里是列表，以匹配read_label_file的输出
-# 将筛选后的图片和标签转换为DataFrame，确保列名与read_label_file函数的输出相匹配
-df_filtered = pd.DataFrame(filtered_images, columns=['image', 'label'])
-# 自定义Dataset类
-class CustomDataset(Dataset):
-    def __init__(self, dataframe, root_dir, transform=None):
-        self.dataframe = dataframe
-        self.root_dir = root_dir
-        self.transform = transform
-    def __len__(self):
-        return len(self.dataframe)
-    def __getitem__(self, idx):
-        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx, 0])
-        image = Image.open(img_name).convert('RGB')
-        label = self.dataframe.iloc[idx, 1]
-        if self.transform:
-            image = self.transform(image)
-        return image, label
-# 2. 数据集划分
-data_folder = './data/FSC147/one'
-label_file = os.path.join(data_folder, 'labels.txt')
-# df = read_label_file(label_file)
-df = df_filtered
-train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
-# 3. 数据加载
-transform = Compose([
-    Resize((224, 224)),
-    ToTensor(),
-    Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
-])
-train_dataset = CustomDataset(train_df, data_folder, transform=transform)
-test_dataset = CustomDataset(test_df, data_folder, transform=transform)
-train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
-test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
-# 4. 模型定义
-class ClipClassifier(nn.Module):
-    def __init__(self, clip_model, embed_dim=512):
-        super(ClipClassifier, self).__init__()
-        self.clip_model = clip_model
-        # 冻结CLIP模型的参数
-        for param in self.clip_model.parameters():
-            param.requires_grad = False
-        self.fc = nn.Linear(clip_model.visual.output_dim, embed_dim)
-        self.classifier = nn.Linear(embed_dim, 2)  # 二分类
-    def forward(self, images):
-        with torch.no_grad():
-            image_features = self.clip_model.encode_image(images).float()
-        x = self.fc(image_features)
-        x = F.relu(x)
-        logits = self.classifier(x)
-        return logits
-class ResNetClassifier(nn.Module):
-    def __init__(self, num_classes=2):
-        super(ResNetClassifier, self).__init__()
-        # 加载预训练的ResNet50模型
-        self.resnet50 = models.resnet50(pretrained=True)
-        # 冻结所有预训练层的参数
-        for param in self.resnet50.parameters():
-            param.requires_grad = False
-        # 替换最后的全连接层以适应二分类任务
-        num_ftrs = self.resnet50.fc.in_features
-        self.resnet50.fc = nn.Linear(num_ftrs, num_classes)
-    def forward(self, images):
-        return self.resnet50(images)
-# 5. 训练和测试
-device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")
-clip_model, _ = clip.load("ViT-B/32", device=device)
-# model = ClipClassifier(clip_model).to(device)
-model = ResNetClassifier().to(device)
-optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
-criterion = nn.CrossEntropyLoss()
-def train(model, device, train_loader, optimizer, epoch):
-    model.train()
-    for batch_idx, (data, target) in enumerate(train_loader):
-        data, target = data.to(device), target.to(device)
-        optimizer.zero_grad()
-        output = model(data)
-        loss = criterion(output, target)
-        loss.backward()
-        optimizer.step()
-        if batch_idx % 10 == 0:
-            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')
-def test(model, device, test_loader):
-    model.eval()
-    test_loss = 0
-    correct = 0
-    with torch.no_grad():
-        for data, target in test_loader:
-            data, target = data.to(device), target.to(device)
-            output = model(data)
-            test_loss += criterion(output, target).item()
-            pred = output.argmax(dim=1, keepdim=True)
-            correct += pred.eq(target.view_as(pred)).sum().item()
-    test_loss /= len(test_loader.dataset)
-    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n')
-    return 100. * correct / len(test_loader.dataset)
-best_accuracy = 0.0
-for epoch in range(1, 11):
-    train(model, device, train_loader, optimizer, epoch)
-    accuracy = test(model, device, test_loader)
-    if accuracy > best_accuracy:
-        best_accuracy = accuracy
-        torch.save(model.state_dict(), './data/out/classify/best_model.pth')
-        print(f'Best model saved with accuracy: {best_accuracy:.2f}%')

VA-Count-main/VA-Count-main/datasetmake.py DELETED Viewed

@@ -1,53 +0,0 @@
-from PIL import Image
-import os
-import random
-def is_image_file(filename):
-    """判断文件是否是图像文件"""
-    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']  # 支持的图像文件扩展名列表
-    return any(filename.lower().endswith(ext) for ext in image_extensions)
-def random_crop(img, size=(256, 256)):
-    """从给定的图片中随机裁剪出指定大小的区域"""
-    width, height = img.size
-    crop_width, crop_height = size
-    if width < crop_width or height < crop_height:
-        return None  # 如果图片尺寸小于裁剪尺寸，则返回None
-    x_left = random.randint(0, width - crop_width)
-    y_upper = random.randint(0, height - crop_height)
-    return img.crop((x_left, y_upper, x_left + crop_width, y_upper + crop_height))
-# 文件夹路径设置（根据实际情况修改）
-single_object_folder = './data/FSC147/box'
-multiple_objects_folder = './data/FSC147/images_384_VarV2'
-output_folder = './data/FSC147/one'
-# 确保输出文件夹存在
-if not os.path.exists(output_folder):
-    os.makedirs(output_folder)
-output_txt_path = os.path.join(output_folder, 'labels.txt')
-with open(output_txt_path, 'w') as f:
-    for folder, label in [(single_object_folder, 'one'), (multiple_objects_folder, 'more')]:
-        for filename in os.listdir(folder):
-            if is_image_file(filename):  # 只处理图像文件
-                img_path = os.path.join(folder, filename)
-                img = Image.open(img_path)
-                # 保存原图并记录到txt文件
-                original_img_output_path = os.path.join(output_folder, filename)
-                img.save(original_img_output_path)
-                f.write(f"{filename},{label}\n")
-                # 从原图中随机裁剪并保存裁剪图像
-                for size in [(256, 384), (256, 256), (384, 384),(128,256),(256,128)]:
-                    img_cropped = random_crop(img, size=size)
-                    if img_cropped:
-                        cropped_img_output_path = os.path.join(output_folder, f"{filename[:-4]}_random_{size[0]}x{size[1]}.jpg")
-                        img_cropped.save(cropped_img_output_path)
-                        f.write(f"{filename[:-4]}_random_{size[0]}x{size[1]}.jpg,{label}\n")
-print("数据集准备完成。")

VA-Count-main/VA-Count-main/figure.png DELETED Viewed

Binary file (418 kB)

VA-Count-main/VA-Count-main/grounding_neg.py DELETED Viewed

@@ -1,188 +0,0 @@
-import torch
-import os
-import inflect
-import argparse
-from GroundingDINO.groundingdino.util.inference import load_model, load_image, predict
-from PIL import Image
-import numpy as np
-from torchvision.ops import box_convert
-import json
-import torch.nn as nn
-import torch.nn.functional as F
-import clip
-# 定义全局变量
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# 阈值设置
-BOX_THRESHOLD = 0.02
-TEXT_THRESHOLD = 0.02
-BOX_THRESHOLD_class = 0.01
-TEXT_THRESHOLD_class = 0.01
-# 初始化inflect引擎
-p = inflect.engine()
-# 将单词转换为单数形式的函数
-def to_singular(word):
-    singular_word = p.singular_noun(word)
-    return singular_word if singular_word else word
-# 定义ClipClassifier类
-class ClipClassifier(nn.Module):
-    def __init__(self, clip_model, embed_dim=512):
-        super(ClipClassifier, self).__init__()
-        self.clip_model = clip_model.to(device)
-        for param in self.clip_model.parameters():
-            param.requires_grad = False
-        self.fc = nn.Linear(clip_model.visual.output_dim, embed_dim)
-        self.classifier = nn.Linear(embed_dim, 2)  # 二分类
-    def forward(self, images):
-        with torch.no_grad():
-            image_features = self.clip_model.encode_image(images).float().to(device)
-        x = self.fc(image_features)
-        x = F.relu(x)
-        logits = self.classifier(x)
-        return logits
-# 初始化和加载二分类模型
-clip_model, preprocess = clip.load("ViT-B/32", device)
-binary_classifier = ClipClassifier(clip_model).to(device)
-# 加载保存的权重
-model_weights_path = './data/out/classify/best_model.pth'
-binary_classifier.load_state_dict(torch.load(model_weights_path, map_location=device))
-# 确认模型已经被设置为评估模式
-binary_classifier.eval()
-# 计算两个边界框的IoU
-def calculate_iou(box1, box2):
-    x1, y1, w1, h1 = box1
-    x2, y2, w2, h2 = box2
-    intersection_x1 = max(x1, x2)
-    intersection_y1 = max(y1, y2)
-    intersection_x2 = min(x1 + w1, x2 + w2)
-    intersection_y2 = min(y1 + h1, y2 + h2)
-    intersection_area = max(intersection_x2 - intersection_x1, 0) * max(intersection_y2 - intersection_y1, 0)
-    box1_area = w1 * h1
-    box2_area = w2 * h2
-    union_area = box1_area + box2_area - intersection_area
-    iou = intersection_area / union_area if union_area > 0 else 0
-    return iou
-# 检查patch是否有效
-def is_valid_patch(patch, binary_classifier, preprocess, device):
-    if patch.size[0] <= 0 or patch.size[1] <= 0:
-        return False
-    patch_tensor = preprocess(patch).unsqueeze(0).to(device)
-    with torch.no_grad():
-        logits = binary_classifier(patch_tensor)
-        probabilities = torch.softmax(logits, dim=1)
-        prob_label_1 = probabilities[0, 1]
-    return prob_label_1.item() > 0.8
-# 处理图片的主函数
-def process_images(text_file_path, dataset_path, model, preprocess, binary_classifier, output_folder, device='cpu'):
-    boxes_dict = {}
-    with open(text_file_path, 'r') as f:
-        for line in f:
-            image_name, class_name = line.strip().split('\t')
-            print(f"Processing image: {image_name}")
-            text_prompt = class_name + ' .'
-            object_prompt = "object ."
-            image_path = os.path.join(dataset_path, image_name)
-            img = Image.open(image_path).convert("RGB")
-            image_source, image = load_image(image_path)
-            h, w, _ = image_source.shape
-            boxes_object, logits_object, _ = predict(model, image, object_prompt, BOX_THRESHOLD, TEXT_THRESHOLD)
-            boxes_class, logits_class, _ = predict(model, image, text_prompt, BOX_THRESHOLD_class, TEXT_THRESHOLD_class)
-            patches_object = box_convert(boxes_object, in_fmt="cxcywh", out_fmt="xyxy")
-            patches_class = box_convert(boxes_class, in_fmt="cxcywh", out_fmt="xyxy")
-            top_patches = []
-            iou_matrix = np.zeros((len(boxes_object), len(boxes_class)))
-            for j, box_class in enumerate(patches_class):
-                box_object_class = box_class.cpu().numpy() * np.array([w, h, w, h], dtype=np.float32)
-                x1_, y1_, x2_, y2_ = box_object_class.astype(int)
-                x1_, y1_, x2_, y2_ = max(x1_, 0), max(y1_, 0), min(x2_, w), min(y2_, h)
-                patch_ = img.crop((x1_, y1_, x2_, y2_))
-                if x2_ - x1_ > w / 2 or y2_ - y1_ > h / 2 or not is_valid_patch(patch_, binary_classifier, preprocess, device):
-                    print(f"Skipping patch at box {box_class}")
-                    continue
-                for i, box_object in enumerate(patches_object):
-                    iou_matrix[i][j] = calculate_iou(box_object.cpu().numpy(), box_class.cpu().numpy())
-            for i, box_object in enumerate(patches_object):
-                max_iou = np.max(iou_matrix[i])
-                if max_iou < 0.5:
-                    box_object = box_object.cpu().numpy() * np.array([w, h, w, h], dtype=np.float32)
-                    x1, y1, x2, y2 = box_object.astype(int)
-                    x1, y1, x2, y2 = max(x1, 0), max(y1, 0), min(x2, w), min(y2, h)
-                    patch = img.crop((x1, y1, x2, y2))
-                    if patch.size == (0, 0) or not is_valid_patch(patch, binary_classifier, preprocess, device) or x2 - x1 > w / 2 or y2 - y1 > h / 2 or y2 - y1 < 5 or x2 - x1 < 5:
-                        print(f"Skipping patch at box {box_object}")
-                        continue
-                    patch_logits = logits_object[i]
-                    top_patches.append((i, patch_logits.item()))
-            top_patches.sort(key=lambda x: x[1], reverse=True)
-            top_3_indices = [patch[0] for patch in top_patches[:3]]
-            while len(top_3_indices) < 3:
-                if len(top_3_indices) > 0:
-                    top_3_indices.append(top_3_indices[-1])
-                else:
-                    default_box = torch.tensor([0,0,20/w,20/h]).unsqueeze(0)
-                    patches_object = torch.cat((patches_object, default_box.to(boxes_object.device)), dim=0)
-                    top_3_indices.append(len(patches_object) - 1)
-            boxes_dict[image_name] = [patches_object[idx].cpu().numpy().tolist() * np.array([w, h, w, h], dtype=np.float32) for idx in top_3_indices]
-    return boxes_dict
-def main(args):
-    # 设置固定的默认路径
-    model_config = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
-    model_weights = "GroundingDINO/weights/groundingdino_swint_ogc.pth"
-    # 根据root_path设置路径
-    text_file_path = os.path.join(args.root_path, "ImageClasses_FSC147.txt")
-    dataset_path = os.path.join(args.root_path, "images_384_VarV2")
-    input_json_path = os.path.join(args.root_path, "annotation_FSC147_384.json")
-    output_json_path = os.path.join(args.root_path, "annotation_FSC147_neg.json")
-    output_folder = os.path.join(args.root_path, "annotated_images_n")
-    os.makedirs(output_folder, exist_ok=True)
-    # 加载GroundingDINO模型
-    model = load_model(model_config, model_weights, device=device)
-    # 处理图片并生成边界框
-    boxes_dict = process_images(text_file_path, dataset_path, model, preprocess, binary_classifier, output_folder, device=device)
-    # 更新JSON文件
-    with open(input_json_path, 'r') as f:
-        data = json.load(f)
-    for image_name, boxes in boxes_dict.items():
-        if image_name in data:
-            new_boxes = [[[x1, y1], [x1, y2], [x2, y2], [x2, y1]] for x1, y1, x2, y2 in boxes]
-            data[image_name]["box_examples_coordinates"] = new_boxes
-    with open(output_json_path, 'w') as f:
-        json.dump(data, f, indent=4)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Image Processing Script")
-    parser.add_argument("--root_path", type=str, required=True, help="Root path to the dataset and output files")
-    args = parser.parse_args()
-    main(args)

VA-Count-main/VA-Count-main/grounding_pos.py DELETED Viewed

@@ -1,141 +0,0 @@
-import torch
-import os
-import clip
-import inflect
-import argparse
-from torchvision.ops import box_convert
-from GroundingDINO.groundingdino.util.inference import load_model, load_image, predict
-from PIL import Image
-import numpy as np
-import json
-import torch.nn as nn
-import torch.nn.functional as F
-# 定义全局变量
-device = "cuda" if torch.cuda.is_available() else "cpu"
-BOX_THRESHOLD = 0.05
-TEXT_THRESHOLD = 0.05
-# 初始化inflect引擎
-p = inflect.engine()
-# 定义 ClipClassifier 类
-class ClipClassifier(nn.Module):
-    def __init__(self, clip_model, embed_dim=512):
-        super(ClipClassifier, self).__init__()
-        self.clip_model = clip_model.to(device)
-        for param in self.clip_model.parameters():
-            param.requires_grad = False
-        self.fc = nn.Linear(clip_model.visual.output_dim, embed_dim)
-        self.classifier = nn.Linear(embed_dim, 2)  # 二分类
-    def forward(self, images):
-        with torch.no_grad():
-            image_features = self.clip_model.encode_image(images).float().to(device)
-        x = self.fc(image_features)
-        x = F.relu(x)
-        logits = self.classifier(x)
-        return logits
-# 加载 CLIP 模型
-clip_model, preprocess = clip.load("ViT-B/32", device)
-clip_model.eval()
-# 初始化并加载二分类模型
-binary_classifier = ClipClassifier(clip_model).to(device)
-model_weights_path = './data/out/classify/best_model.pth'
-binary_classifier.load_state_dict(torch.load(model_weights_path, map_location=device))
-binary_classifier.eval()
-# 判断 patch 是否有效
-def is_valid_patch(patch, binary_classifier, preprocess, device):
-    if patch.size[0] <= 0 or patch.size[1] <= 0:
-        return False
-    patch_tensor = preprocess(patch).unsqueeze(0).to(device)
-    with torch.no_grad():
-        logits = binary_classifier(patch_tensor)
-        probabilities = torch.softmax(logits, dim=1)
-        prob_label_1 = probabilities[0, 1]
-    return prob_label_1.item() > 0.8
-# 处理图片的主函数
-def process_images(text_file_path, dataset_path, model, preprocess, clip_model, output_folder, device='cpu'):
-    boxes_dict = {}
-    with open(text_file_path, 'r') as f:
-        for line in f:
-            image_name, class_name = line.strip().split('\t')
-            print(f"Processing image: {image_name}")
-            text_prompt = class_name + ' .'
-            image_path = os.path.join(dataset_path, image_name)
-            img = Image.open(image_path).convert("RGB")
-            image_source, image = load_image(image_path)
-            h, w, _ = image_source.shape
-            boxes, logits, _ = predict(model, image, text_prompt, BOX_THRESHOLD, TEXT_THRESHOLD)
-            patches = box_convert(boxes, in_fmt="cxcywh", out_fmt="xyxy")
-            top_patches = []
-            for i, (box, logit) in enumerate(zip(patches, logits)):
-                box = box.cpu().numpy() * np.array([w, h, w, h], dtype=np.float32)
-                x1, y1, x2, y2 = box.astype(int)
-                x1, y1, x2, y2 = max(x1, 0), max(y1, 0), min(x2, w), min(y2, h)
-                patch = img.crop((x1, y1, x2, y2))
-                if patch.size == (0, 0) or not is_valid_patch(patch, binary_classifier, preprocess, device) or x2 - x1 > w / 2 or y2 - y1 > h / 2 or y2 - y1 < 5 or x2 - x1 < 5:
-                    print(f"Skipping patch due to binary classifier at box {box}")
-                    continue
-                top_patches.append((i, logit))
-            top_patches.sort(key=lambda x: x[1], reverse=True)
-            top_3_indices = [patch[0] for patch in top_patches[:3]]
-            # 确保每张图像都有三个边界框
-            while len(top_3_indices) < 3:
-                if len(top_3_indices) > 0:
-                    top_3_indices.append(top_3_indices[-1])
-                else:
-                    default_box = torch.tensor([0, 0, 20 / w, 20 / h]).unsqueeze(0)
-                    patches = torch.cat((patches, default_box.to(boxes.device)), dim=0)
-                    top_3_indices.append(len(patches) - 1)
-            boxes_dict[image_name] = [patches[idx].cpu().numpy().tolist() * np.array([w, h, w, h], dtype=np.float32) for idx in top_3_indices]
-    return boxes_dict
-# 主函数
-def main(args):
-    # 设置固定的默认路径
-    model_config = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
-    model_weights = "GroundingDINO/weights/groundingdino_swint_ogc.pth"
-    output_folder = os.path.join(args.root_path, "annotated_images")
-    # 根据 root_path 设置路径
-    text_file_path = os.path.join(args.root_path, "ImageClasses_FSC147.txt")
-    dataset_path = os.path.join(args.root_path, "images_384_VarV2")
-    input_json_path = os.path.join(args.root_path, "annotation_FSC147_384_old.json")
-    output_json_path = os.path.join(args.root_path, "annotation_FSC147_pos.json")
-    os.makedirs(output_folder, exist_ok=True)
-    # 加载 GroundingDINO 模型
-    model = load_model(model_config, model_weights, device=device)
-    # 处理��片并生成边界框
-    boxes_dict = process_images(text_file_path, dataset_path, model, preprocess, clip_model, output_folder, device=device)
-    # 更新 JSON 文件
-    with open(input_json_path, 'r') as f:
-        data = json.load(f)
-    for image_name, boxes in boxes_dict.items():
-        if image_name in data:
-            new_boxes = [[[x1, y1], [x1, y2], [x2, y2], [x2, y1]] for x1, y1, x2, y2 in boxes]
-            data[image_name]["box_examples_coordinates"] = new_boxes
-    with open(output_json_path, 'w') as f:
-        json.dump(data, f, indent=4)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Image Processing Script")
-    parser.add_argument("--root_path", type=str, required=True, help="Root path to the dataset and output files")
-    args = parser.parse_args()
-    main(args)

VA-Count-main/VA-Count-main/models_crossvit.py DELETED Viewed

@@ -1,155 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.hub
-from itertools import repeat
-import collections.abc
-def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by_keep: bool = True):
-    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
-    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
-    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
-    'survival rate' as the argument.
-    """
-    if drop_prob == 0. or not training:
-        return x
-    keep_prob = 1 - drop_prob
-    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
-    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
-    if keep_prob > 0.0 and scale_by_keep:
-        random_tensor.div_(keep_prob)
-    return x * random_tensor
-class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-    def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-        self.scale_by_keep = scale_by_keep
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
-def _ntuple(n):
-    def parse(x):
-        if isinstance(x, collections.abc.Iterable):
-            return x
-        return tuple(repeat(x, n))
-    return parse
-to_2tuple = _ntuple(2)
-class Mlp(nn.Module):
-    """ MLP as used in Vision Transformer, MLP-Mixer and related networks
-    """
-    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        drop_probs = to_2tuple(drop)
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.drop1 = nn.Dropout(drop_probs[0])
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop2 = nn.Dropout(drop_probs[1])
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop1(x)
-        x = self.fc2(x)
-        x = self.drop2(x)
-        return x
-class Attention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
-        self.scale = qk_scale or head_dim ** -0.5
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-    def forward(self, x):
-        B, N, C = x.shape
-        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv[0], qkv[1], qkv[2]   # make torchscript happy (cannot use tensor as tuple)
-        attn = (q @ k.transpose(-2, -1)) * self.scale
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-class CrossAttention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
-        self.scale = qk_scale or head_dim ** -0.5
-        self.wq = nn.Linear(dim, dim, bias=qkv_bias)
-        self.wk = nn.Linear(dim, dim, bias=qkv_bias)
-        self.wv = nn.Linear(dim, dim, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-    def forward(self, x, y):
-        B, Nx, C = x.shape
-        Ny = y.shape[1]
-        # BNxC -> BNxH(C/H) -> BHNx(C/H)
-        q = self.wq(x).reshape(B, Nx, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
-        # BNyC -> BNyH(C/H) -> BHNy(C/H)
-        k = self.wk(y).reshape(B, Ny, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
-        # BNyC -> BNyH(C/H) -> BHNy(C/H)
-        v = self.wv(y).reshape(B, Ny, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
-        attn = (q @ k.transpose(-2, -1)) * self.scale  # BHNx(C/H) @ BH(C/H)Ny -> BHNxNy
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-        x = (attn @ v).transpose(1, 2).reshape(B, Nx, C)  # (BHNxNy @ BHNy(C/H)) -> BHNx(C/H) -> BNxH(C/H) -> BNxC
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-class CrossAttentionBlock(nn.Module):
-    def __init__(
-            self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
-            drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
-        super().__init__()
-        self.norm0 = norm_layer(dim)
-        self.selfattn = Attention(
-            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
-        self.drop_path0 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.norm1 = norm_layer(dim)
-        self.attn = CrossAttention(
-            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
-        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
-        self.drop_path1 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.norm2 = norm_layer(dim)
-        self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop)
-        self.drop_path2 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-    def forward(self, x, y):
-        x = x + self.drop_path0(self.selfattn(self.norm0(x)))
-        x = x + self.drop_path1(self.attn(self.norm1(x), y))
-        x = x + self.drop_path2(self.mlp(self.norm2(x)))
-        return x

VA-Count-main/VA-Count-main/models_mae_cross.py DELETED Viewed

@@ -1,253 +0,0 @@
-import time
-from functools import partial
-import math
-import random
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torchvision.utils
-from timm.models.vision_transformer import PatchEmbed, Block
-from models_crossvit import CrossAttentionBlock
-from util.pos_embed import get_2d_sincos_pos_embed
-class SupervisedMAE(nn.Module):
-    def __init__(self, img_size=384, patch_size=16, in_chans=3,
-                 embed_dim=1024, depth=24, num_heads=16,
-                 decoder_embed_dim=512, decoder_depth=2, decoder_num_heads=16,
-                 mlp_ratio=4., norm_layer=nn.LayerNorm, norm_pix_loss=False):
-        super().__init__()
-        # --------------------------------------------------------------------------
-        # MAE encoder specifics
-        self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim)
-        num_patches = self.patch_embed.num_patches
-        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim), requires_grad=False)  # fixed sin-cos embedding
-        self.blocks = nn.ModuleList([
-            Block(embed_dim, num_heads, mlp_ratio, qkv_bias=True, qk_scale=None, norm_layer=norm_layer)
-            for i in range(depth)])
-        self.norm = norm_layer(embed_dim)
-        # --------------------------------------------------------------------------
-        # --------------------------------------------------------------------------
-        # MAE decoder specifics
-        self.decoder_embed = nn.Linear(embed_dim, decoder_embed_dim, bias=True)
-        self.decoder_pos_embed = nn.Parameter(torch.zeros(1, num_patches, decoder_embed_dim), requires_grad=False)  # fixed sin-cos embedding
-        self.shot_token = nn.Parameter(torch.zeros(512))
-        # Exemplar encoder with CNN
-        self.decoder_proj1 = nn.Sequential(
-            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
-            nn.InstanceNorm2d(64),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(2) #[3,64,64]->[64,32,32]
-        )
-        self.decoder_proj2 = nn.Sequential(
-            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
-            nn.InstanceNorm2d(128),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(2) #[64,32,32]->[128,16,16]
-        )
-        self.decoder_proj3 = nn.Sequential(
-            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
-            nn.InstanceNorm2d(256),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(2) # [128,16,16]->[256,8,8]
-        )
-        self.decoder_proj4 = nn.Sequential(
-            nn.Conv2d(256, decoder_embed_dim, kernel_size=3, stride=1, padding=1),
-            nn.InstanceNorm2d(512),
-            nn.ReLU(inplace=True),
-            nn.AdaptiveAvgPool2d((1,1))
-            # [256,8,8]->[512,1,1]
-        )
-        self.decoder_blocks = nn.ModuleList([
-            CrossAttentionBlock(decoder_embed_dim, decoder_num_heads, mlp_ratio, qkv_bias=True, qk_scale=None, norm_layer=norm_layer)
-            for i in range(decoder_depth)])
-        self.decoder_norm = norm_layer(decoder_embed_dim)
-        # Density map regresssion module
-        self.decode_head0 = nn.Sequential(
-            nn.Conv2d(decoder_embed_dim, 256, kernel_size=3, stride=1, padding=1),
-            nn.GroupNorm(8, 256),
-            nn.ReLU(inplace=True)
-        )
-        self.decode_head1 = nn.Sequential(
-            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
-            nn.GroupNorm(8, 256),
-            nn.ReLU(inplace=True)
-        )
-        self.decode_head2 = nn.Sequential(
-            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
-            nn.GroupNorm(8, 256),
-            nn.ReLU(inplace=True)
-        )
-        self.decode_head3 = nn.Sequential(
-            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
-            nn.GroupNorm(8, 256),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(256, 1, kernel_size=1, stride=1)
-        )
-        # --------------------------------------------------------------------------
-        self.norm_pix_loss = norm_pix_loss
-        self.initialize_weights()
-    def initialize_weights(self):
-        # initialization
-        # initialize (and freeze) pos_embed by sin-cos embedding
-        pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=False)
-        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
-        decoder_pos_embed = get_2d_sincos_pos_embed(self.decoder_pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=False)
-        self.decoder_pos_embed.data.copy_(torch.from_numpy(decoder_pos_embed).float().unsqueeze(0))
-        # initialize patch_embed like nn.Linear (instead of nn.Conv2d)
-        w = self.patch_embed.proj.weight.data
-        torch.nn.init.xavier_uniform_(w.view([w.shape[0], -1]))
-        torch.nn.init.normal_(self.shot_token, std=.02)
-        # initialize nn.Linear and nn.LayerNorm
-        self.apply(self._init_weights)
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            # we use xavier_uniform following official JAX ViT:
-            torch.nn.init.xavier_uniform_(m.weight)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
-    def forward_encoder(self, x):
-        # embed patches
-        x = self.patch_embed(x)
-        # add pos embed w/o cls token
-        x = x + self.pos_embed
-        # apply Transformer blocks
-        for blk in self.blocks:
-            x = blk(x)
-        x = self.norm(x)
-        return x
-    def forward_decoder(self, x, y_, shot_num=3):
-        # embed tokens
-        x = self.decoder_embed(x)
-        # add pos embed
-        x = x + self.decoder_pos_embed
-        # Exemplar encoder
-        y_ = y_.transpose(0,1) # y_ [N,3,3,64,64]->[3,N,3,64,64]
-        y1=[]
-        C=0
-        N=0
-        cnt = 0
-        for yi in y_:
-            cnt+=1
-            if cnt > shot_num:
-                break
-            yi = self.decoder_proj1(yi)
-            yi = self.decoder_proj2(yi)
-            yi = self.decoder_proj3(yi)
-            yi = self.decoder_proj4(yi)
-            N, C,_,_ = yi.shape
-            y1.append(yi.squeeze(-1).squeeze(-1)) # yi [N,C,1,1]->[N,C]
-        if shot_num > 0:
-            y = torch.cat(y1,dim=0).reshape(shot_num,N,C).to(x.device)
-        else:
-            y = self.shot_token.repeat(y_.shape[1],1).unsqueeze(0).to(x.device)
-        y = y.transpose(0,1) # y [3,N,C]->[N,3,C]
-        # apply Transformer blocks
-        for blk in self.decoder_blocks:
-            x = blk(x, y)
-        x = self.decoder_norm(x)
-        # Density map regression
-        n, hw, c = x.shape
-        h = w = int(math.sqrt(hw))
-        x = x.transpose(1, 2).reshape(n, c, h, w)
-        x = F.interpolate(
-                        self.decode_head0(x), size=x.shape[-1]*2, mode='bilinear', align_corners=False)
-        x = F.interpolate(
-                        self.decode_head1(x), size=x.shape[-1]*2, mode='bilinear', align_corners=False)
-        x = F.interpolate(
-                        self.decode_head2(x), size=x.shape[-1]*2, mode='bilinear', align_corners=False)
-        x = F.interpolate(
-                        self.decode_head3(x), size=x.shape[-1]*2, mode='bilinear', align_corners=False)
-        x = x.squeeze(-3)
-        return x
-    def forward(self, imgs, boxes, shot_num):
-        # if boxes.nelement() > 0:
-        #     torchvision.utils.save_image(boxes[0], f"data/out/crops/box_{time.time()}_{random.randint(0, 99999):>5}.png")
-        with torch.no_grad():
-            latent = self.forward_encoder(imgs)
-        pred = self.forward_decoder(latent, boxes, shot_num)  # [N, 384, 384]
-        return pred
-def mae_vit_base_patch16_dec512d8b(**kwargs):
-    model = SupervisedMAE(
-        patch_size=16, embed_dim=768, depth=12, num_heads=12,
-        decoder_embed_dim=512, decoder_depth=2, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-def mae_vit_large_patch16_dec512d8b(**kwargs):
-    model = SupervisedMAE(
-        patch_size=16, embed_dim=1024, depth=24, num_heads=16,
-        decoder_embed_dim=512, decoder_depth=2, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-def mae_vit_huge_patch14_dec512d8b(**kwargs):
-    model = SupervisedMAE(
-        patch_size=14, embed_dim=1280, depth=32, num_heads=16,
-        decoder_embed_dim=512, decoder_depth=2, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-def mae_vit_base_patch16_fim4(**kwargs):
-    model = SupervisedMAE(
-        patch_size=16, embed_dim=768, depth=12, num_heads=12,
-        decoder_embed_dim=512, decoder_depth=4, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-def mae_vit_base_patch16_fim6(**kwargs):
-    model = SupervisedMAE(
-        patch_size=16, embed_dim=768, depth=12, num_heads=12,
-        decoder_embed_dim=512, decoder_depth=6, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-# set recommended archs
-mae_vit_base_patch16 = mae_vit_base_patch16_dec512d8b
-mae_vit_base4_patch16 = mae_vit_base_patch16_fim4 # decoder: 4 blocks
-mae_vit_base6_patch16 = mae_vit_base_patch16_fim6 # decoder: 6 blocks
-mae_vit_large_patch16 = mae_vit_large_patch16_dec512d8b
-mae_vit_huge_patch14 = mae_vit_huge_patch14_dec512d8b

VA-Count-main/VA-Count-main/models_mae_noct.py DELETED Viewed

@@ -1,234 +0,0 @@
-from functools import partial
-import torch
-import torch.nn as nn
-from timm.models.vision_transformer import PatchEmbed, Block
-from util.pos_embed import get_2d_sincos_pos_embed
-class MaskedAutoencoderViTNoCT(nn.Module):
-    """ Masked Autoencoder with VisionTransformer backbone
-    """
-    def __init__(self, img_size=384, patch_size=16, in_chans=3,
-                 embed_dim=1024, depth=24, num_heads=16,
-                 decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
-                 mlp_ratio=4., norm_layer=nn.LayerNorm, norm_pix_loss=False):
-        super().__init__()
-        # --------------------------------------------------------------------------
-        # MAE encoder specifics
-        self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim)
-        num_patches = self.patch_embed.num_patches
-        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim), requires_grad=False)  # fixed sin-cos embedding
-        self.blocks = nn.ModuleList([
-            Block(embed_dim, num_heads, mlp_ratio, qkv_bias=True, qk_scale=None, norm_layer=norm_layer)
-            for i in range(depth)])
-        self.norm = norm_layer(embed_dim)
-        # --------------------------------------------------------------------------
-        # --------------------------------------------------------------------------
-        # MAE decoder specifics
-        self.decoder_embed = nn.Linear(embed_dim, decoder_embed_dim, bias=True)
-        self.mask_token = nn.Parameter(torch.zeros(1, 1, decoder_embed_dim))
-        self.decoder_pos_embed = nn.Parameter(torch.zeros(1, num_patches, decoder_embed_dim), requires_grad=False)  # fixed sin-cos embedding
-        self.decoder_blocks = nn.ModuleList([
-            Block(decoder_embed_dim, decoder_num_heads, mlp_ratio, qkv_bias=True, qk_scale=None, norm_layer=norm_layer)
-            for i in range(decoder_depth)])
-        self.decoder_norm = norm_layer(decoder_embed_dim)
-        self.decoder_pred = nn.Linear(decoder_embed_dim, patch_size**2 * in_chans, bias=True) # decoder to patch
-        # --------------------------------------------------------------------------
-        self.norm_pix_loss = norm_pix_loss
-        self.initialize_weights()
-    def initialize_weights(self):
-        # initialization
-        # initialize (and freeze) pos_embed by sin-cos embedding
-        pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=False)
-        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
-        decoder_pos_embed = get_2d_sincos_pos_embed(self.decoder_pos_embed.shape[-1], int(self.patch_embed.num_patches**.5), cls_token=False)
-        self.decoder_pos_embed.data.copy_(torch.from_numpy(decoder_pos_embed).float().unsqueeze(0))
-        # initialize patch_embed like nn.Linear (instead of nn.Conv2d)
-        w = self.patch_embed.proj.weight.data
-        torch.nn.init.xavier_uniform_(w.view([w.shape[0], -1]))
-        # timm's trunc_normal_(std=.02) is effectively normal_(std=0.02) as cutoff is too big (2.)
-        torch.nn.init.normal_(self.mask_token, std=.02)
-        # initialize nn.Linear and nn.LayerNorm
-        self.apply(self._init_weights)
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            # we use xavier_uniform following official JAX ViT:
-            torch.nn.init.xavier_uniform_(m.weight)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
-    def patchify(self, imgs):
-        """
-        imgs: (N, 3, H, W)
-        x: (N, L, patch_size**2 *3)
-        """
-        p = self.patch_embed.patch_size[0]
-        assert imgs.shape[2] == imgs.shape[3] and imgs.shape[2] % p == 0
-        h = w = imgs.shape[2] // p
-        x = imgs.reshape(shape=(imgs.shape[0], 3, h, p, w, p))
-        x = torch.einsum('nchpwq->nhwpqc', x)
-        x = x.reshape(shape=(imgs.shape[0], h * w, p**2 * 3))
-        return x
-    def unpatchify(self, x):
-        """
-        x: (N, L, patch_size**2 *3)
-        imgs: (N, 3, H, W)
-        """
-        p = self.patch_embed.patch_size[0]
-        h = w = int(x.shape[1]**.5)
-        assert h * w == x.shape[1]
-        x = x.reshape(shape=(x.shape[0], h, w, p, p, 3))
-        x = torch.einsum('nhwpqc->nchpwq', x)
-        imgs = x.reshape(shape=(x.shape[0], 3, h * p, h * p))
-        return imgs
-    def random_masking(self, x, mask_ratio):
-        """
-        Perform per-sample random masking by per-sample shuffling.
-        Per-sample shuffling is done by argsort random noise.
-        x: [N, L, D], sequence
-        """
-        N, L, D = x.shape  # batch, length, dim
-        len_keep = int(L * (1 - mask_ratio))
-        noise = torch.rand(N, L, device=x.device)  # noise in [0, 1]
-        # sort noise for each sample
-        ids_shuffle = torch.argsort(noise, dim=1)  # ascend: small is keep, large is remove
-        ids_restore = torch.argsort(ids_shuffle, dim=1)
-        # keep the first subset
-        ids_keep = ids_shuffle[:, :len_keep]
-        x_masked = torch.gather(x, dim=1, index=ids_keep.unsqueeze(-1).repeat(1, 1, D))
-        # generate the binary mask: 0 is keep, 1 is remove
-        mask = torch.ones([N, L], device=x.device)
-        mask[:, :len_keep] = 0
-        # unshuffle to get the binary mask
-        mask = torch.gather(mask, dim=1, index=ids_restore)
-        return x_masked, mask, ids_restore
-    def forward_encoder(self, x, mask_ratio):
-        # embed patches
-        x = self.patch_embed(x)
-        # add pos embed w/o cls token
-        x = x + self.pos_embed
-        # masking: length -> length * mask_ratio
-        x, mask, ids_restore = self.random_masking(x, mask_ratio)
-        # apply Transformer blocks
-        for blk in self.blocks:
-            x = blk(x)
-        x = self.norm(x)
-        return x, mask, ids_restore
-    def forward_decoder(self, x, ids_restore):
-        # embed tokens
-        x = self.decoder_embed(x)
-        # append mask tokens to sequence
-        mask_tokens = self.mask_token.repeat(x.shape[0], ids_restore.shape[1] - x.shape[1], 1)
-        x_ = torch.cat([x, mask_tokens], dim=1)  # no cls token
-        x_ = torch.gather(x_, dim=1, index=ids_restore.unsqueeze(-1).repeat(1, 1, x.shape[2]))  # unshuffle
-        x = x_ # append cls token
-        # add pos embed
-        x = x + self.decoder_pos_embed
-        # apply Transformer blocks
-        for blk in self.decoder_blocks:
-            x = blk(x)
-        x = self.decoder_norm(x)
-        # predictor projection
-        x = self.decoder_pred(x)
-        return x
-    def forward_loss(self, imgs, pred, mask):
-        """
-        imgs: [N, 3, H, W]
-        pred: [N, L, p*p*3]
-        mask: [N, L], 0 is keep, 1 is remove,
-        """
-        target = self.patchify(imgs)
-        if self.norm_pix_loss:
-            mean = target.mean(dim=-1, keepdim=True)
-            var = target.var(dim=-1, keepdim=True)
-            target = (target - mean) / (var + 1.e-6)**.5
-        loss = (pred - target) ** 2
-        loss = loss.mean(dim=-1)  # [N, L], mean loss per patch
-        # For mean loss on all patches
-        N, L = mask.shape
-        mask_s = torch.ones([N, L], device=imgs.device)
-        loss = (loss * mask_s).sum() / mask_s.sum()
-        #loss = (loss * mask).sum() / mask.sum()  # mean loss on removed patches
-        return loss
-    def forward(self, imgs, mask_ratio=0.75):
-        latent, mask, ids_restore = self.forward_encoder(imgs, mask_ratio)
-        pred = self.forward_decoder(latent, ids_restore)  # [N, L, p*p*3]
-        loss = self.forward_loss(imgs, pred, mask)
-        return loss, pred, mask
-def mae_vit_base_patch16_dec512d8b(**kwargs):
-    model = MaskedAutoencoderViTNoCT(
-        patch_size=16, embed_dim=768, depth=12, num_heads=12,
-        decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-def mae_vit_large_patch16_dec512d8b(**kwargs):
-    model = MaskedAutoencoderViTNoCT(
-        patch_size=16, embed_dim=1024, depth=24, num_heads=16,
-        decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-def mae_vit_huge_patch14_dec512d8b(**kwargs):
-    model = MaskedAutoencoderViTNoCT(
-        patch_size=14, embed_dim=1280, depth=32, num_heads=16,
-        decoder_embed_dim=512, decoder_depth=8, decoder_num_heads=16,
-        mlp_ratio=4, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    return model
-# set recommended archs
-mae_vit_base_patch16 = mae_vit_base_patch16_dec512d8b  # decoder: 512 dim, 8 blocks
-mae_vit_large_patch16 = mae_vit_large_patch16_dec512d8b  # decoder: 512 dim, 8 blocks
-mae_vit_huge_patch14 = mae_vit_huge_patch14_dec512d8b  # decoder: 512 dim, 8 blocks

VA-Count-main/VA-Count-main/requirements.txt DELETED Viewed

@@ -1,15 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu116
-torch==1.13.1+cu116
-torchvision==0.14.1+cu116
-timm==0.4.9
-numpy==1.23.4
-scipy==1.10.1
-imgaug==0.4.0
-pillow==9.3.0
-matplotlib==3.6.3
-hub==3.0.1
-pandas==1.5.2
-six==1.16.0
-wandb
-tqdm

VA-Count-main/VA-Count-main/util/FSC147.py DELETED Viewed

@@ -1,524 +0,0 @@
-from argparse import Namespace
-import json
-from pathlib import Path
-import numpy as np
-import random
-from torchvision import transforms
-import torch
-import cv2
-import torchvision.transforms.functional as TF
-import scipy.ndimage as ndimage
-from PIL import Image
-import argparse
-import imgaug.augmenters as iaa
-from imgaug.augmentables import Keypoint, KeypointsOnImage
-MAX_HW = 384
-IM_NORM_MEAN = [0.485, 0.456, 0.406]
-IM_NORM_STD = [0.229, 0.224, 0.225]
-def get_args_parser():
-    parser = argparse.ArgumentParser('MAE pre-training', add_help=False)
-    parser.add_argument('--batch_size', default=8, type=int,
-                        help='Batch size per GPU (effective batch size is batch_size * accum_iter * # gpus')
-    parser.add_argument('--epochs', default=200, type=int)
-    parser.add_argument('--accum_iter', default=1, type=int,
-                        help='Accumulate gradient iterations (for increasing the effective batch size under memory constraints)')
-    # Model parameters
-    parser.add_argument('--model', default='mae_vit_base_patch16', type=str, metavar='MODEL',
-                        help='Name of model to train')
-    parser.add_argument('--mask_ratio', default=0.5, type=float,
-                        help='Masking ratio (percentage of removed patches).')
-    parser.add_argument('--norm_pix_loss', action='store_true',
-                        help='Use (per-patch) normalized pixels as targets for computing loss')
-    parser.set_defaults(norm_pix_loss=False)
-    # Optimizer parameters
-    parser.add_argument('--weight_decay', type=float, default=0.05,
-                        help='weight decay (default: 0.05)')
-    parser.add_argument('--lr', type=float, default=None, metavar='LR',
-                        help='learning rate (absolute lr)')
-    parser.add_argument('--blr', type=float, default=1e-3, metavar='LR',
-                        help='base learning rate: absolute_lr = base_lr * total_batch_size / 256')
-    parser.add_argument('--min_lr', type=float, default=0., metavar='LR',
-                        help='lower lr bound for cyclic schedulers that hit 0')
-    parser.add_argument('--warmup_epochs', type=int, default=10, metavar='N',
-                        help='epochs to warmup LR')
-    # Dataset parameters
-    parser.add_argument('--data_path', default='./data/FSC147/', type=str,
-                        help='dataset path')
-    parser.add_argument('--anno_file', default='annotation_FSC147_384.json', type=str,
-                        help='annotation json file')
-    parser.add_argument('--data_split_file', default='Train_Test_Val_FSC_147.json', type=str,
-                        help='data split json file')
-    parser.add_argument('--im_dir', default='images_384_VarV2', type=str,
-                        help='images directory')
-    parser.add_argument('--gt_dir', default='./data/FSC147/gt_density_map_adaptive_384_VarV2', type=str,
-                        help='ground truth directory')
-    parser.add_argument('--output_dir', default='./data/out/pre_4_dir',
-                        help='path where to save, empty for no saving')
-    parser.add_argument('--device', default='cuda',
-                        help='device to use for training / testing')
-    parser.add_argument('--seed', default=0, type=int)
-    parser.add_argument('--resume', default='./weights/mae_pretrain_vit_base_full.pth',  # mae_visualize_vit_base
-                        help='resume from checkpoint')
-    # Training parameters
-    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
-                        help='start epoch')
-    parser.add_argument('--num_workers', default=10, type=int)
-    parser.add_argument('--pin_mem', action='store_true',
-                        help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
-    parser.add_argument('--no_pin_mem', action='store_false', dest='pin_mem')
-    parser.set_defaults(pin_mem=True)
-    # Distributed training parameters
-    parser.add_argument('--world_size', default=1, type=int,
-                        help='number of distributed processes')
-    parser.add_argument('--local_rank', default=-1, type=int)
-    parser.add_argument('--dist_on_itp', action='store_true')
-    parser.add_argument('--dist_url', default='env://',
-                        help='url used to set up distributed training')
-    # Logging parameters
-    parser.add_argument('--log_dir', default='./logs/pre_4_dir',
-                        help='path where to tensorboard log')
-    parser.add_argument("--title", default="CounTR_pretraining", type=str)
-    parser.add_argument("--wandb", default="counting", type=str)
-    parser.add_argument("--team", default="wsense", type=str)
-    parser.add_argument("--wandb_id", default=None, type=str)
-    parser.add_argument("--do_aug", default=True, type=bool)
-    parser.add_argument('--class_file', default='./data/FSC147/ImageClasses_FSC147.txt', type=str,
-                        help='class json file')
-    return parser
-args = get_args_parser()
-args = args.parse_args()
-class ResizeSomeImage(object):
-    def __init__(self, args):
-        args = get_args_parser()
-        args = args.parse_args()
-        # print(dir(args.im_dir.as_posix()))
-        self.data_path = Path(args.data_path)
-        self.im_dir = self.data_path/args.im_dir
-        anno_file = self.data_path/args.anno_file
-        data_split_file = self.data_path/args.data_split_file
-        with open(anno_file) as f:
-            self.annotations = json.load(f)
-        with open(data_split_file) as f:
-            data_split = json.load(f)
-        self.train_set = data_split['train']
-        self.class_dict = {}
-        if args.do_aug:
-            with open(args.class_file) as f:
-                for line in f:
-                    key = line.split()[0]
-                    val = line.split()[1:]
-                    self.class_dict[key] = val
-class ResizePreTrainImage(ResizeSomeImage):
-    """
-    Resize the image so that:
-        1. Image is equal to 384 * 384
-        2. The new height and new width are divisible by 16
-        3. The aspect ratio is preserved
-    Density and boxes correctness not preserved(crop and horizontal flip)
-    """
-    def __init__(self, args, MAX_HW=384):
-        super().__init__(args)
-        self.max_hw = MAX_HW
-    def __call__(self, sample):
-        image, lines_boxes, density = sample['image'], sample['lines_boxes'], sample['gt_density']
-        W, H = image.size
-        new_H = 16 * int(H / 16)
-        new_W = 16 * int(W / 16)
-        resized_image = transforms.Resize((new_H, new_W))(image)
-        resized_density = cv2.resize(density, (new_W, new_H))
-        orig_count = np.sum(density)
-        new_count = np.sum(resized_density)
-        if new_count > 0:
-            resized_density = resized_density * (orig_count / new_count)
-        boxes = list()
-        for box in lines_boxes:
-            box2 = [int(k) for k in box]
-            y1, x1, y2, x2 = box2[0], box2[1], box2[2], box2[3]
-            boxes.append([0, y1, x1, y2, x2])
-        boxes = torch.Tensor(boxes).unsqueeze(0)
-        resized_image = PreTrainNormalize(resized_image)
-        resized_density = torch.from_numpy(resized_density).unsqueeze(0).unsqueeze(0)
-        sample = {'image': resized_image, 'boxes': boxes, 'gt_density': resized_density}
-        return sample
-class ResizeTrainImage(ResizeSomeImage):
-    """
-    Resize the image so that:
-        1. Image is equal to 384 * 384
-        2. The new height and new width are divisible by 16
-        3. The aspect ratio is possibly preserved
-    Density map is cropped to have the same size(and position) with the cropped image
-    Exemplar boxes may be outside the cropped area.
-    Augmentation including Gaussian noise, Color jitter, Gaussian blur, Random affine, Random horizontal flip and Mosaic (or Random Crop if no Mosaic) is used.
-    """
-    def __init__(self, args, MAX_HW=384, do_aug=True):
-        super().__init__(args)
-        self.max_hw = MAX_HW
-        self.do_aug = do_aug
-    def __call__(self, sample):
-        image, lines_boxes, neg_lines_boxes, dots, im_id, m_flag = sample['image'], sample['lines_boxes'], sample['neg_lines_boxes'], \
-            sample['dots'], sample['id'], sample['m_flag']
-        W, H = image.size
-        new_H = 16 * int(H / 16)
-        new_W = 16 * int(W / 16)
-        scale_factor_h = float(new_H) / H
-        scale_factor_w = float(new_W) / W
-        resized_image = transforms.Resize((new_H, new_W))(image)
-        resized_image = TTensor(resized_image)
-        resized_density = np.zeros((new_H, new_W), dtype='float32')
-        # Augmentation probability
-        aug_flag = self.do_aug
-        mosaic_flag = random.random() < 0.25
-        if aug_flag:
-            # Gaussian noise
-            noise = np.random.normal(0, 0.1, resized_image.size())
-            noise = torch.from_numpy(noise)
-            re_image = resized_image + noise
-            re_image = torch.clamp(re_image, 0, 1)
-            # Color jitter and Gaussian blur
-            re_image = Augmentation(re_image)
-            # Random affine
-            re1_image = re_image.transpose(0, 1).transpose(1, 2).numpy()
-            keypoints = []
-            for i in range(dots.shape[0]):
-                keypoints.append(Keypoint(x=min(new_W - 1, int(dots[i][0] * scale_factor_w)), y=min(new_H - 1, int(dots[i][1] * scale_factor_h))))
-            kps = KeypointsOnImage(keypoints, re1_image.shape)
-            seq = iaa.Sequential([
-                iaa.Affine(
-                    rotate=(-15, 15),
-                    scale=(0.8, 1.2),
-                    shear=(-10, 10),
-                    translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}
-                )
-            ])
-            re1_image, kps_aug = seq(image=re1_image, keypoints=kps)
-            # Produce dot annotation map
-            resized_density = np.zeros((resized_density.shape[0], resized_density.shape[1]), dtype='float32')
-            for i in range(len(kps.keypoints)):
-                if (int(kps_aug.keypoints[i].y) <= new_H - 1 and int(kps_aug.keypoints[i].x) <= new_W - 1) and not \
-                        kps_aug.keypoints[i].is_out_of_image(re1_image):
-                    resized_density[int(kps_aug.keypoints[i].y)][int(kps_aug.keypoints[i].x)] = 1
-            resized_density = torch.from_numpy(resized_density)
-            re_image = TTensor(re1_image)
-            # Random horizontal flip
-            flip_p = random.random()
-            if flip_p > 0.5:
-                re_image = TF.hflip(re_image)
-                resized_density = TF.hflip(resized_density)
-            # Random self mosaic
-            if mosaic_flag:
-                image_array = []
-                map_array = []
-                blending_l = random.randint(10, 20)
-                resize_l = 192 + 2 * blending_l
-                if dots.shape[0] >= 70:
-                    for i in range(4):
-                        length = random.randint(150, 384)
-                        start_W = random.randint(0, new_W - length)
-                        start_H = random.randint(0, new_H - length)
-                        reresized_image1 = TF.crop(resized_image, start_H, start_W, length, length)
-                        reresized_image1 = transforms.Resize((resize_l, resize_l))(reresized_image1)
-                        reresized_density1 = np.zeros((resize_l, resize_l), dtype='float32')
-                        for i in range(dots.shape[0]):
-                            if start_H <= min(new_H - 1, int(dots[i][1] * scale_factor_h)) < start_H + length and start_W <= min(new_W - 1, int(dots[i][0] * scale_factor_w)) < start_W + length:
-                                reresized_density1[min(resize_l-1,int((min(new_H-1,int(dots[i][1] * scale_factor_h))-start_H)*resize_l/length))][min(resize_l-1,int((min(new_W-1,int(dots[i][0] * scale_factor_w))-start_W)*resize_l/length))]=1
-                        reresized_density1 = torch.from_numpy(reresized_density1)
-                        image_array.append(reresized_image1)
-                        map_array.append(reresized_density1)
-                else:
-                    m_flag = 1
-                    prob = random.random()
-                    if prob > 0.25:
-                        gt_pos = random.randint(0, 3)
-                    else:
-                        gt_pos = random.randint(0, 4)  # 5% 0 objects
-                    for i in range(4):
-                        if i == gt_pos:
-                            Tim_id = im_id
-                            r_image = resized_image
-                            Tdots = dots
-                            new_TH = new_H
-                            new_TW = new_W
-                            Tscale_factor_w = scale_factor_w
-                            Tscale_factor_h = scale_factor_h
-                        else:
-                            Tim_id = self.train_set[random.randint(0, len(self.train_set) - 1)]
-                            Tdots = np.array(self.annotations[Tim_id]['points'])
-                            Timage = Image.open('{}/{}'.format(self.im_dir, Tim_id))
-                            Timage.load()
-                            new_TW = 16 * int(Timage.size[0] / 16)
-                            new_TH = 16 * int(Timage.size[1] / 16)
-                            Tscale_factor_w = float(new_TW) / Timage.size[0]
-                            Tscale_factor_h = float(new_TH) / Timage.size[1]
-                            r_image = TTensor(transforms.Resize((new_TH, new_TW))(Timage))
-                        length = random.randint(250, 384)
-                        start_W = random.randint(0, new_TW - length)
-                        start_H = random.randint(0, new_TH - length)
-                        r_image1 = TF.crop(r_image, start_H, start_W, length, length)
-                        r_image1 = transforms.Resize((resize_l, resize_l))(r_image1)
-                        r_density1 = np.zeros((resize_l, resize_l), dtype='float32')
-                        # try:
-                        #     class_value = self.class_dict[im_id]
-                        #     Tim_value = self.class_dict[Tim_id]
-                        # except KeyError:
-                        #     # Handle the case when the key doesn't exist
-                        #     class_value = None  # Or any appropriate default value
-                        #     Tim_value = None  # Or any appropriate default value
-                        if self.class_dict[im_id] == self.class_dict[Tim_id]:
-                        # if class_value == Tim_value:
-                        # if im_id in self.class_dict and Tim_id in self.class_dict:
-                        # if im_id in self.class_dict and Tim_id in self.class_dict:
-                        #     class_value = self.class_dict[im_id]
-                        #     Tim_value = self.class_dict[Tim_id]
-                        # # Proceed with your comparison and processing here
-                        # if class_value == Tim_value:
-                            for i in range(Tdots.shape[0]):
-                                if start_H <= min(new_TH - 1, int(Tdots[i][1] * Tscale_factor_h)) < start_H + length and start_W <= min(new_TW - 1, int(Tdots[i][0] * Tscale_factor_w)) < start_W + length:
-                                    r_density1[min(resize_l-1,int((min(new_TH-1, int(Tdots[i][1] * Tscale_factor_h))-start_H)*resize_l/length))][min(resize_l-1,int((min(new_TW-1,int(Tdots[i][0] * Tscale_factor_w))-start_W)*resize_l/length))]=1
-                        r_density1 = torch.from_numpy(r_density1)
-                        image_array.append(r_image1)
-                        map_array.append(r_density1)
-                reresized_image5 = torch.cat((image_array[0][:, blending_l:resize_l-blending_l], image_array[1][:, blending_l: resize_l-blending_l]), 1)
-                reresized_density5 = torch.cat((map_array[0][blending_l:resize_l-blending_l], map_array[1][blending_l: resize_l-blending_l]), 0)
-                for i in range(blending_l):
-                        reresized_image5[:, 192+i] = image_array[0][:, resize_l-1-blending_l+i] * (blending_l-i)/(2 * blending_l) + reresized_image5[:, 192+i] * (i+blending_l)/(2*blending_l)
-                        reresized_image5[:, 191-i] = image_array[1][:, blending_l-i] * (blending_l-i)/(2*blending_l) + reresized_image5[:, 191-i] * (i+blending_l)/(2*blending_l)
-                reresized_image5 = torch.clamp(reresized_image5, 0, 1)
-                reresized_image6 = torch.cat((image_array[2][:, blending_l:resize_l-blending_l], image_array[3][:, blending_l: resize_l-blending_l]), 1)
-                reresized_density6 = torch.cat((map_array[2][blending_l:resize_l-blending_l], map_array[3][blending_l:resize_l-blending_l]), 0)
-                for i in range(blending_l):
-                        reresized_image6[:, 192+i] = image_array[2][:, resize_l-1-blending_l+i] * (blending_l-i)/(2*blending_l) + reresized_image6[:, 192+i] * (i+blending_l)/(2*blending_l)
-                        reresized_image6[:, 191-i] = image_array[3][:, blending_l-i] * (blending_l-i)/(2*blending_l) + reresized_image6[:, 191-i] * (i+blending_l)/(2*blending_l)
-                reresized_image6 = torch.clamp(reresized_image6, 0, 1)
-                reresized_image = torch.cat((reresized_image5[:, :, blending_l:resize_l-blending_l], reresized_image6[:, :, blending_l:resize_l-blending_l]), 2)
-                reresized_density = torch.cat((reresized_density5[:, blending_l:resize_l-blending_l], reresized_density6[:, blending_l:resize_l-blending_l]), 1)
-                for i in range(blending_l):
-                        reresized_image[:, :, 192+i] = reresized_image5[:, :, resize_l-1-blending_l+i] * (blending_l-i)/(2*blending_l) + reresized_image[:, :, 192+i] * (i+blending_l)/(2*blending_l)
-                        reresized_image[:, :, 191-i] = reresized_image6[:, :, blending_l-i] * (blending_l-i)/(2*blending_l) + reresized_image[:, :, 191-i] * (i+blending_l)/(2*blending_l)
-                reresized_image = torch.clamp(reresized_image, 0, 1)
-            else:
-                # Random 384*384 crop in a new_W*384 image and 384*new_W density map
-                start = random.randint(0, new_W - 1 - 383)
-                reresized_image = TF.crop(re_image, 0, start, 384, 384)
-                reresized_density = resized_density[:, start:start + 384]
-        else:
-            # Random 384*384 crop in a new_W*384 image and 384*new_W density map
-            for i in range(dots.shape[0]):
-                resized_density[min(new_H - 1, int(dots[i][1] * scale_factor_h))] \
-                                [min(new_W - 1, int(dots[i][0] * scale_factor_w))] = 1
-            resized_density = torch.from_numpy(resized_density)
-            start = random.randint(0, new_W - self.max_hw)
-            reresized_image = TF.crop(resized_image, 0, start, self.max_hw, self.max_hw)
-            reresized_density = resized_density[0:self.max_hw, start:start + self.max_hw]
-        # Gaussian distribution density map
-        reresized_density = ndimage.gaussian_filter(reresized_density.numpy(), sigma=(1, 1), order=0)
-        # Density map scale up
-        reresized_density = reresized_density * 60
-        reresized_density = torch.from_numpy(reresized_density)
-        # Crop bboxes and resize as 64x64
-        boxes = list()
-        rects = list()
-        cnt = 0
-        for box in lines_boxes:
-            cnt += 1
-            if cnt > 3:
-                break
-            box2 = [int(k) for k in box]
-            y1 = int(box2[0] * scale_factor_h)
-            x1 = int(box2[1] * scale_factor_w)
-            y2 = int(box2[2] * scale_factor_h)
-            x2 = int(box2[3] * scale_factor_w)
-            # print(y1,x1,y2,x2)
-            if not aug_flag:
-                rects.append(torch.tensor([y1, max(0, x1-start), y2, min(self.max_hw, x2-start)]))
-            bbox = resized_image[:, y1:y2 + 1, x1:x2 + 1]
-            bbox = transforms.Resize((64, 64))(bbox)
-            boxes.append(bbox)
-        boxes = torch.stack(boxes)
-        neg_boxes = list()
-        neg_rects = list()
-        cnt = 0
-        for box in neg_lines_boxes:
-            cnt += 1
-            if cnt > 3:
-                break
-            box2 = [int(k) for k in box]
-            y1 = int(box2[0] * scale_factor_h)
-            x1 = int(box2[1] * scale_factor_w)
-            y2 = int(box2[2] * scale_factor_h)
-            x2 = int(box2[3] * scale_factor_w)
-            # print(y1,x1,y2,x2)
-            if not aug_flag:
-                neg_rects.append(torch.tensor([y1, max(0, x1-start), y2, min(self.max_hw, x2-start)]))
-            neg_bbox = resized_image[:, y1:y2 + 1, x1:x2 + 1]
-            neg_bbox = transforms.Resize((64, 64))(neg_bbox)
-            neg_boxes.append(neg_bbox)
-        neg_boxes = torch.stack(neg_boxes)
-        # if len(boxes) > 0:
-        #     boxes = torch.stack(boxes)  # 如果 boxes 非空，则正常执行 torch.stack
-        #     boxes1 = boxes
-        # else:
-        #     boxes = boxes1
-        #     pass
-        # # 如果 boxes 为空，您可以选择跳过这个样本，或者提供一个默认的边界框
-        # # 例如，使用一个表示图像全区域的默认边界框
-        #     default_box = torch.tensor([[0, 0],[0, 0],0, 0])  # 一个示例的默认边界框，具体值取决于您的应用
-        #     boxes = default_box.unsqueeze(0)  # 增加一个维度以符合 torch.stack 的要求
-        #     # pass
-        if aug_flag:
-            pos = torch.tensor([])
-        else:
-            pos = torch.stack(rects)
-        # boxes shape [3,3,64,64], image shape [3,384,384], density shape[384,384]
-        sample = {'image': reresized_image, 'boxes': boxes, 'neg_boxes': neg_boxes, 'pos': pos, 'gt_density': reresized_density, 'm_flag': m_flag}
-        return sample
-class ResizeValImage(ResizeSomeImage):
-    def __init__(self, args, MAX_HW=384):
-        super().__init__(args)
-        self.max_hw = MAX_HW
-    def __call__(self, sample):
-        image, dots, m_flag, lines_boxes, neg_lines_boxes = sample['image'], sample['dots'], sample['m_flag'], sample['lines_boxes'], sample['neg_lines_boxes']
-        W, H = image.size
-        new_H = new_W = self.max_hw
-        scale_factor_h = float(new_H) / H
-        scale_factor_w = float(new_W) / W
-        resized_image = transforms.Resize((new_H, new_W))(image)
-        resized_image = TTensor(resized_image)
-        # Resize density map
-        resized_density = np.zeros((new_H, new_W), dtype='float32')
-        for i in range(dots.shape[0]):
-            resized_density[min(new_H - 1, int(dots[i][1] * scale_factor_h))] \
-                           [min(new_W - 1, int(dots[i][0] * scale_factor_w))] = 1
-        # resized_density = ndimage.gaussian_filter(resized_density, sigma=4, radius=7, order=0)
-        resized_density = ndimage.gaussian_filter(resized_density, sigma=4, order=0)
-        resized_density = torch.from_numpy(resized_density) * 60
-        # Crop bboxes and resize as 64x64
-        boxes = list()
-        rects = list()
-        cnt = 0
-        for box in lines_boxes:
-            cnt += 1
-            if cnt > 3:
-                break
-            box2 = [int(k) for k in box]
-            y1 = int(box2[0] * scale_factor_h)
-            x1 = int(box2[1] * scale_factor_w)
-            y2 = int(box2[2] * scale_factor_h)
-            x2 = int(box2[3] * scale_factor_w)
-            rects.append(torch.tensor([y1, x1, y2, x2]))
-            bbox = resized_image[:, y1:y2 + 1, x1:x2 + 1]
-            bbox = transforms.Resize((64, 64))(bbox)
-            boxes.append(bbox)
-        boxes = torch.stack(boxes)
-        pos = torch.stack(rects)
-        neg_boxes = list()
-        neg_rects = list()
-        cnt = 0
-        for box in neg_lines_boxes:
-            cnt += 1
-            if cnt > 3:
-                break
-            box2 = [int(k) for k in box]
-            y1 = int(box2[0] * scale_factor_h)
-            x1 = int(box2[1] * scale_factor_w)
-            y2 = int(box2[2] * scale_factor_h)
-            x2 = int(box2[3] * scale_factor_w)
-            neg_rects.append(torch.tensor([y1, x1, y2, x2]))
-            neg_bbox = resized_image[:, y1:y2 + 1, x1:x2 + 1]
-            neg_bbox = transforms.Resize((64, 64))(neg_bbox)
-            neg_boxes.append(neg_bbox)
-        neg_boxes = torch.stack(neg_boxes)
-        # boxes shape [3,3,64,64], image shape [3,384,384], density shape[384,384]
-        sample = {'image': resized_image, 'boxes': boxes, 'neg_boxes': neg_boxes, 'pos': pos, 'gt_density': resized_density, 'm_flag': m_flag}
-        return sample
-PreTrainNormalize = transforms.Compose([
-    transforms.RandomResizedCrop(MAX_HW, scale=(0.2, 1.0), interpolation=3),
-    transforms.RandomHorizontalFlip(),
-    transforms.ToTensor(),
-    # transforms.Normalize(mean=IM_NORM_MEAN, std=IM_NORM_STD)
-])
-TTensor = transforms.Compose([
-    transforms.ToTensor(),
-])
-Augmentation = transforms.Compose([
-    transforms.ColorJitter(brightness=0.25, contrast=0.15, saturation=0.15, hue=0.15),
-    transforms.GaussianBlur(kernel_size=(7, 9))
-])
-Normalize = transforms.Compose([
-    transforms.ToTensor(),
-    transforms.Normalize(mean=IM_NORM_MEAN, std=IM_NORM_STD)
-])
-def transform_train(args: Namespace, do_aug=True):
-    return transforms.Compose([ResizeTrainImage(args, MAX_HW, do_aug)])
-def transform_val(args: Namespace):
-    return transforms.Compose([ResizeValImage(args, MAX_HW)])
-def transform_pre_train(args: Namespace):
-    return transforms.Compose([ResizePreTrainImage(args, MAX_HW)])

VA-Count-main/VA-Count-main/util/__pycache__/FSC147.cpython-38.pyc DELETED Viewed

Binary file (15.3 kB)

VA-Count-main/VA-Count-main/util/__pycache__/FSC147.cpython-39.pyc DELETED Viewed

Binary file (14.4 kB)

VA-Count-main/VA-Count-main/util/__pycache__/FSC147_test.cpython-38.pyc DELETED Viewed

Binary file (16.6 kB)

VA-Count-main/VA-Count-main/util/__pycache__/lr_sched.cpython-38.pyc DELETED Viewed

Binary file (628 Bytes)

VA-Count-main/VA-Count-main/util/__pycache__/lr_sched.cpython-39.pyc DELETED Viewed

Binary file (628 Bytes)

VA-Count-main/VA-Count-main/util/__pycache__/misc.cpython-38.pyc DELETED Viewed

Binary file (19.5 kB)

VA-Count-main/VA-Count-main/util/__pycache__/misc.cpython-39.pyc DELETED Viewed

Binary file (19.4 kB)

VA-Count-main/VA-Count-main/util/__pycache__/pos_embed.cpython-38.pyc DELETED Viewed

Binary file (2.41 kB)

VA-Count-main/VA-Count-main/util/__pycache__/pos_embed.cpython-39.pyc DELETED Viewed

Binary file (2.39 kB)

VA-Count-main/VA-Count-main/util/crop.py DELETED Viewed

@@ -1,42 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import math
-import torch
-from torchvision import transforms
-from torchvision.transforms import functional as F
-class RandomResizedCrop(transforms.RandomResizedCrop):
-    """
-    RandomResizedCrop for matching TF/TPU implementation: no for-loop is used.
-    This may lead to results different with torchvision's version.
-    Following BYOL's TF code:
-    https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206
-    """
-    @staticmethod
-    def get_params(img, scale, ratio):
-        width, height = F._get_image_size(img)
-        area = height * width
-        target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
-        log_ratio = torch.log(torch.tensor(ratio))
-        aspect_ratio = torch.exp(
-            torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
-        ).item()
-        w = int(round(math.sqrt(target_area * aspect_ratio)))
-        h = int(round(math.sqrt(target_area / aspect_ratio)))
-        w = min(w, width)
-        h = min(h, height)
-        i = torch.randint(0, height - h + 1, size=(1,)).item()
-        j = torch.randint(0, width - w + 1, size=(1,)).item()
-        return i, j, h, w

VA-Count-main/VA-Count-main/util/datasets.py DELETED Viewed

@@ -1,65 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# --------------------------------------------------------
-# References:
-# DeiT: https://github.com/facebookresearch/deit
-# --------------------------------------------------------
-import os
-import PIL
-from torchvision import datasets, transforms
-from timm.data import create_transform
-from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-def build_dataset(is_train, args):
-    transform = build_transform(is_train, args)
-    root = os.path.join(args.data_path, 'train' if is_train else 'val')
-    dataset = datasets.ImageFolder(root, transform=transform)
-    print(dataset)
-    return dataset
-def build_transform(is_train, args):
-    mean = IMAGENET_DEFAULT_MEAN
-    std = IMAGENET_DEFAULT_STD
-    # train transform
-    if is_train:
-        # this should always dispatch to transforms_imagenet_train
-        transform = create_transform(
-            input_size=args.input_size,
-            is_training=True,
-            color_jitter=args.color_jitter,
-            auto_augment=args.aa,
-            interpolation='bicubic',
-            re_prob=args.reprob,
-            re_mode=args.remode,
-            re_count=args.recount,
-            mean=mean,
-            std=std,
-        )
-        return transform
-    # eval transform
-    t = []
-    if args.input_size <= 224:
-        crop_pct = 224 / 256
-    else:
-        crop_pct = 1.0
-    size = int(args.input_size / crop_pct)
-    t.append(
-        transforms.Resize(size, interpolation=PIL.Image.BICUBIC),  # to maintain same ratio w.r.t. 224 images
-    )
-    t.append(transforms.CenterCrop(args.input_size))
-    t.append(transforms.ToTensor())
-    t.append(transforms.Normalize(mean, std))
-    return transforms.Compose(t)

VA-Count-main/VA-Count-main/util/lars.py DELETED Viewed

@@ -1,47 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# --------------------------------------------------------
-# LARS optimizer, implementation from MoCo v3:
-# https://github.com/facebookresearch/moco-v3
-# --------------------------------------------------------
-import torch
-class LARS(torch.optim.Optimizer):
-    """
-    LARS optimizer, no rate scaling or weight decay for parameters <= 1D.
-    """
-    def __init__(self, params, lr=0, weight_decay=0, momentum=0.9, trust_coefficient=0.001):
-        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum, trust_coefficient=trust_coefficient)
-        super().__init__(params, defaults)
-    @torch.no_grad()
-    def step(self):
-        for g in self.param_groups:
-            for p in g['params']:
-                dp = p.grad
-                if dp is None:
-                    continue
-                if p.ndim > 1: # if not normalization gamma/beta or bias
-                    dp = dp.add(p, alpha=g['weight_decay'])
-                    param_norm = torch.norm(p)
-                    update_norm = torch.norm(dp)
-                    one = torch.ones_like(param_norm)
-                    q = torch.where(param_norm > 0.,
-                                    torch.where(update_norm > 0,
-                                    (g['trust_coefficient'] * param_norm / update_norm), one),
-                                    one)
-                    dp = dp.mul(q)
-                param_state = self.state[p]
-                if 'mu' not in param_state:
-                    param_state['mu'] = torch.zeros_like(p)
-                mu = param_state['mu']
-                mu.mul_(g['momentum']).add_(dp)
-                p.add_(mu, alpha=-g['lr'])

VA-Count-main/VA-Count-main/util/lr_decay.py DELETED Viewed

@@ -1,76 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# --------------------------------------------------------
-# References:
-# ELECTRA https://github.com/google-research/electra
-# BEiT: https://github.com/microsoft/unilm/tree/master/beit
-# --------------------------------------------------------
-import json
-def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):
-    """
-    Parameter groups for layer-wise lr decay
-    Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58
-    """
-    param_group_names = {}
-    param_groups = {}
-    num_layers = len(model.blocks) + 1
-    layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))
-    for n, p in model.named_parameters():
-        if not p.requires_grad:
-            continue
-        # no decay: all 1D parameters and model specific ones
-        if p.ndim == 1 or n in no_weight_decay_list:
-            g_decay = "no_decay"
-            this_decay = 0.
-        else:
-            g_decay = "decay"
-            this_decay = weight_decay
-        layer_id = get_layer_id_for_vit(n, num_layers)
-        group_name = "layer_%d_%s" % (layer_id, g_decay)
-        if group_name not in param_group_names:
-            this_scale = layer_scales[layer_id]
-            param_group_names[group_name] = {
-                "lr_scale": this_scale,
-                "weight_decay": this_decay,
-                "params": [],
-            }
-            param_groups[group_name] = {
-                "lr_scale": this_scale,
-                "weight_decay": this_decay,
-                "params": [],
-            }
-        param_group_names[group_name]["params"].append(n)
-        param_groups[group_name]["params"].append(p)
-    # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2))
-    return list(param_groups.values())
-def get_layer_id_for_vit(name, num_layers):
-    """
-    Assign a parameter with its layer id
-    Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
-    """
-    if name in ['cls_token', 'pos_embed']:
-        return 0
-    elif name.startswith('patch_embed'):
-        return 0
-    elif name.startswith('blocks'):
-        return int(name.split('.')[1]) + 1
-    else:
-        return num_layers

VA-Count-main/VA-Count-main/util/lr_sched.py DELETED Viewed

@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import math
-def adjust_learning_rate(optimizer, epoch, args):
-    """Decay the learning rate with half-cycle cosine after warmup"""
-    if epoch < args.warmup_epochs:
-        lr = args.lr * epoch / args.warmup_epochs
-    else:
-        lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \
-            (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)))
-    for param_group in optimizer.param_groups:
-        if "lr_scale" in param_group:
-            param_group["lr"] = lr * param_group["lr_scale"]
-        else:
-            param_group["lr"] = lr
-    return lr

VA-Count-main/VA-Count-main/util/misc.py DELETED Viewed

@@ -1,624 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# --------------------------------------------------------
-# References:
-# DeiT: https://github.com/facebookresearch/deit
-# BEiT: https://github.com/microsoft/unilm/tree/master/beit
-# --------------------------------------------------------
-import builtins
-import datetime
-import os
-import time
-import json
-from collections import defaultdict, deque
-from pathlib import Path
-# from typing import Union
-import pandas as pd
-import torch
-import torch.distributed as dist
-import wandb
-# from torch._six import inf
-from torch import inf
-import matplotlib.pyplot as plt
-from torchvision import transforms
-import cv2
-from tqdm import tqdm
-from typing import Union, List
-class SmoothedValue(object):
-    """Track a series of values and provide access to smoothed values over a
-    window or the global series average.
-    """
-    def __init__(self, window_size=20, fmt=None):
-        if fmt is None:
-            fmt = "{median:.4f} ({global_avg:.4f})"
-        self.deque = deque(maxlen=window_size)
-        self.total = 0.0
-        self.count = 0
-        self.fmt = fmt
-    def update(self, value, n=1):
-        self.deque.append(value)
-        self.count += n
-        self.total += value * n
-    def synchronize_between_processes(self):
-        """
-        Warning: does not synchronize the deque!
-        """
-        if not is_dist_avail_and_initialized():
-            return
-        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
-        dist.barrier()
-        dist.all_reduce(t)
-        t = t.tolist()
-        self.count = int(t[0])
-        self.total = t[1]
-    @property
-    def median(self):
-        d = torch.tensor(list(self.deque))
-        return d.median().item()
-    @property
-    def avg(self):
-        d = torch.tensor(list(self.deque), dtype=torch.float32)
-        return d.mean().item()
-    @property
-    def global_avg(self):
-        if self.count == 0:
-        # Return a default value or handle the zero count scenario
-            return 0  # Or any other default value or handling mechanism
-        else:
-            return self.total / self.count
-        # return self.total / self.count
-    @property
-    def max(self):
-        return max(self.deque)
-    @property
-    def value(self):
-        return self.deque[-1]
-    def __str__(self):
-        return self.fmt.format(
-            median=self.median,
-            avg=self.avg,
-            global_avg=self.global_avg,
-            max=self.max,
-            value=self.value)
-class MetricLogger(object):
-    def __init__(self, delimiter="\t"):
-        self.meters = defaultdict(SmoothedValue)
-        self.delimiter = delimiter
-    def update(self, **kwargs):
-        for k, v in kwargs.items():
-            if v is None:
-                continue
-            if isinstance(v, torch.Tensor):
-                v = v.item()
-            assert isinstance(v, (float, int))
-            self.meters[k].update(v)
-    def __getattr__(self, attr):
-        if attr in self.meters:
-            return self.meters[attr]
-        if attr in self.__dict__:
-            return self.__dict__[attr]
-        raise AttributeError("'{}' object has no attribute '{}'".format(
-            type(self).__name__, attr))
-    def __str__(self):
-        loss_str = []
-        for name, meter in self.meters.items():
-            loss_str.append(
-                "{}: {}".format(name, str(meter))
-            )
-        return self.delimiter.join(loss_str)
-    def synchronize_between_processes(self):
-        for meter in self.meters.values():
-            meter.synchronize_between_processes()
-    def add_meter(self, name, meter):
-        self.meters[name] = meter
-    def log_every(self, iterable, print_freq, header=None):
-        i = 0
-        if not header:
-            header = ''
-        start_time = time.time()
-        end = time.time()
-        iter_time = SmoothedValue(fmt='{avg:.4f}')
-        data_time = SmoothedValue(fmt='{avg:.4f}')
-        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
-        log_msg = [
-            header,
-            '[{0' + space_fmt + '}/{1}]',
-            'eta: {eta}',
-            '{meters}',
-            'time: {time}',
-            'data: {data}'
-        ]
-        if torch.cuda.is_available():
-            log_msg.append('max mem: {memory:.0f}')
-        log_msg = self.delimiter.join(log_msg)
-        MB = 1024.0 * 1024.0
-        for obj in iterable:
-            data_time.update(time.time() - end)
-            yield obj
-            iter_time.update(time.time() - end)
-            if i % print_freq == 0 or i == len(iterable) - 1:
-                eta_seconds = iter_time.global_avg * (len(iterable) - i)
-                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
-                if torch.cuda.is_available():
-                    print(log_msg.format(
-                        i, len(iterable), eta=eta_string,
-                        meters=str(self),
-                        time=str(iter_time), data=str(data_time),
-                        memory=torch.cuda.max_memory_allocated() / MB))
-                else:
-                    print(log_msg.format(
-                        i, len(iterable), eta=eta_string,
-                        meters=str(self),
-                        time=str(iter_time), data=str(data_time)))
-            i += 1
-            end = time.time()
-        total_time = time.time() - start_time
-        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-        print('{} Total time: {} ({:.4f} s / it)'.format(
-            header, total_time_str, total_time / len(iterable)))
-def setup_for_distributed(is_master):
-    """
-    This function disables printing when not in master process
-    """
-    builtin_print = builtins.print
-    def print(*args, **kwargs):
-        force = kwargs.pop('force', False)
-        force = force or (get_world_size() > 8)
-        if is_master or force:
-            now = datetime.datetime.now().time()
-            builtin_print('[{}] '.format(now), end='')  # print with time stamp
-            builtin_print(*args, **kwargs)
-    builtins.print = print
-def is_dist_avail_and_initialized():
-    if not dist.is_available():
-        return False
-    if not dist.is_initialized():
-        return False
-    return True
-def get_world_size():
-    if not is_dist_avail_and_initialized():
-        return 1
-    return dist.get_world_size()
-def get_rank():
-    if not is_dist_avail_and_initialized():
-        return 0
-    return dist.get_rank()
-def is_main_process():
-    return get_rank() == 0
-def save_on_master(*args, **kwargs):
-    if is_main_process():
-        torch.save(*args, **kwargs)
-def init_distributed_mode(args):
-    if args.dist_on_itp:
-        args.rank = int(os.environ['OMPI_COMM_WORLD_RANK'])
-        args.world_size = int(os.environ['OMPI_COMM_WORLD_SIZE'])
-        args.gpu = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
-        args.dist_url = "tcp://%s:%s" % (os.environ['MASTER_ADDR'], os.environ['MASTER_PORT'])
-        os.environ['LOCAL_RANK'] = str(args.gpu)
-        os.environ['RANK'] = str(args.rank)
-        os.environ['WORLD_SIZE'] = str(args.world_size)
-        # ["RANK", "WORLD_SIZE", "MASTER_ADDR", "MASTER_PORT", "LOCAL_RANK"]
-    elif 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
-        args.rank = int(os.environ["RANK"])
-        args.world_size = int(os.environ['WORLD_SIZE'])
-        args.gpu = int(os.environ['LOCAL_RANK'])
-    elif 'SLURM_PROCID' in os.environ:
-        args.rank = int(os.environ['SLURM_PROCID'])
-        args.gpu = args.rank % torch.cuda.device_count()
-    else:
-        print('Not using distributed mode')
-        setup_for_distributed(is_master=True)  # hack
-        args.distributed = False
-        return
-    args.distributed = True
-    torch.cuda.set_device(args.gpu)
-    args.dist_backend = 'nccl'
-    print('| distributed init (rank {}): {}, gpu {}'.format(
-        args.rank, args.dist_url, args.gpu), flush=True)
-    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
-                                         world_size=args.world_size, rank=args.rank)
-    torch.distributed.barrier()
-    setup_for_distributed(args.rank == 0)
-class NativeScalerWithGradNormCount:
-    state_dict_key = "amp_scaler"
-    def __init__(self):
-        self._scaler = torch.cuda.amp.GradScaler()
-    def __call__(self, loss, optimizer, clip_grad=None, parameters=None, create_graph=False, update_grad=True):
-        self._scaler.scale(loss).backward(create_graph=create_graph)
-        if update_grad:
-            if clip_grad is not None:
-                assert parameters is not None
-                self._scaler.unscale_(optimizer)  # unscale the gradients of optimizer's assigned params in-place
-                norm = torch.nn.utils.clip_grad_norm_(parameters, clip_grad)
-            else:
-                self._scaler.unscale_(optimizer)
-                norm = get_grad_norm_(parameters)
-            self._scaler.step(optimizer)
-            self._scaler.update()
-        else:
-            norm = None
-        return norm
-    def state_dict(self):
-        return self._scaler.state_dict()
-    def load_state_dict(self, state_dict):
-        self._scaler.load_state_dict(state_dict)
-def get_grad_norm_(parameters, norm_type: float = 2.0) -> torch.Tensor:
-    if isinstance(parameters, torch.Tensor):
-        parameters = [parameters]
-    parameters = [p for p in parameters if p.grad is not None]
-    norm_type = float(norm_type)
-    if len(parameters) == 0:
-        return torch.tensor(0.)
-    device = parameters[0].grad.device
-    if norm_type == inf:
-        total_norm = max(p.grad.detach().abs().max().to(device) for p in parameters)
-    else:
-        total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type)
-    return total_norm
-def save_model(args, epoch, model, model_without_ddp, optimizer, loss_scaler, suffix="", upload=True):
-    if suffix:
-        suffix = f"__{suffix}"
-    output_dir = Path(args.output_dir)
-    ckpt_name = f"checkpoint{suffix}.pth"
-    if loss_scaler is not None:
-        checkpoint_paths = [output_dir / ckpt_name]
-        for checkpoint_path in checkpoint_paths:
-            to_save = {
-                'model': model_without_ddp.state_dict(),
-                'optimizer': optimizer.state_dict(),
-                'epoch': epoch,
-                'scaler': loss_scaler.state_dict(),
-                'args': args,
-            }
-            save_on_master(to_save, checkpoint_path)
-            if upload and is_main_process():
-                log_wandb_model(f"checkpoint{suffix}", checkpoint_path, epoch)
-            print("checkpoint sent to W&B (if)")
-    else:
-        client_state = {'epoch': epoch}
-        model.save_checkpoint(save_dir=args.output_dir, tag=ckpt_name, client_state=client_state)
-        if upload and is_main_process():
-            log_wandb_model(f"checkpoint{suffix}", output_dir / ckpt_name, epoch)
-        print("checkpoint sent to W&B (else)")
-def log_wandb_model(title, path, epoch):
-    artifact = wandb.Artifact(title, type="model")
-    artifact.add_file(path)
-    artifact.metadata["epoch"] = epoch
-    wandb.log_artifact(artifact_or_path=artifact, name=title)
-def load_model(args, model_without_ddp, optimizer, loss_scaler):
-    if args.resume:
-        if args.resume.startswith('https'):
-            checkpoint = torch.hub.load_state_dict_from_url(
-                args.resume, map_location='cpu', check_hash=True)
-        else:
-            checkpoint = torch.load(args.resume, map_location='cpu')
-        if 'pos_embed' in checkpoint['model'] and checkpoint['model']['pos_embed'].shape != model_without_ddp.state_dict()['pos_embed'].shape:
-            print(f"Removing key pos_embed from pretrained checkpoint")
-            del checkpoint['model']['pos_embed']
-        if 'decoder_pos_embed' in checkpoint['model'] and checkpoint['model']['decoder_pos_embed'].shape != model_without_ddp.state_dict()['decoder_pos_embed'].shape:
-            print(f"Removing key decoder_pos_embed from pretrained checkpoint")
-            del checkpoint['model']['decoder_pos_embed']
-        model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
-        print("Resume checkpoint %s" % args.resume)
-        if 'optimizer' in checkpoint and 'epoch' in checkpoint and not (hasattr(args, 'eval') and args.eval):
-            optimizer.load_state_dict(checkpoint['optimizer'])
-            args.start_epoch = checkpoint['epoch'] + 1
-            if 'scaler' in checkpoint:
-                loss_scaler.load_state_dict(checkpoint['scaler'])
-            print("With optim & sched!")
-def load_model_FSC(args, model_without_ddp):
-    if args.resume:
-        if args.resume.startswith('https'):
-            checkpoint = torch.hub.load_state_dict_from_url(
-                args.resume, map_location='cpu', check_hash=True)
-        else:
-            checkpoint = torch.load(args.resume, map_location='cpu')
-        if 'pos_embed' in checkpoint['model'] and checkpoint['model']['pos_embed'].shape != model_without_ddp.state_dict()['pos_embed'].shape:
-            print(f"Removing key pos_embed from pretrained checkpoint")
-            del checkpoint['model']['pos_embed']
-        model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
-        print(f"Resume checkpoint {args.resume} ({checkpoint['epoch']})")
-def load_model_FSC1(args, model_without_ddp):
-    if args.resume:
-        if args.resume.startswith('https'):
-            checkpoint = torch.hub.load_state_dict_from_url(
-                args.resume, map_location='cpu', check_hash=True)
-        else:
-            checkpoint = torch.load(args.resume, map_location='cpu')
-            #model = timm.create_model('vit_base_patch16_224', pretrained=True)
-            #torch.save(model.state_dict(), './output_abnopre_dir/checkpoint-6657.pth')
-            checkpoint1 = torch.load('./output_abnopre_dir/checkpoint-6657.pth', map_location='cpu')
-        if 'pos_embed' in checkpoint['model'] and checkpoint['model']['pos_embed'].shape != model_without_ddp.state_dict()['pos_embed'].shape:
-            print(f"Removing key pos_embed from pretrained checkpoint")
-            del checkpoint['model']['pos_embed']
-        del checkpoint1['cls_token'],checkpoint1['pos_embed']
-        model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
-        model_without_ddp.load_state_dict(checkpoint1, strict=False)
-        print("Resume checkpoint %s" % args.resume)
-def load_model_FSC_full(args, model_without_ddp, optimizer, loss_scaler):
-    if args.resume:
-        if args.resume.startswith('https'):
-            checkpoint = torch.hub.load_state_dict_from_url(
-                args.resume, map_location='cpu', check_hash=True)
-        else:
-            checkpoint = torch.load(args.resume, map_location='cpu')
-        if 'pos_embed' in checkpoint['model'] and checkpoint['model']['pos_embed'].shape != \
-                model_without_ddp.state_dict()['pos_embed'].shape:
-            print(f"Removing key pos_embed from pretrained checkpoint")
-            del checkpoint['model']['pos_embed']
-        model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
-        print("Resume checkpoint %s" % args.resume)
-        if 'optimizer' in checkpoint and 'epoch' in checkpoint and args.do_resume:
-            optimizer.load_state_dict(checkpoint['optimizer'])
-            args.start_epoch = checkpoint['epoch'] + 1
-            if 'scaler' in checkpoint:
-                loss_scaler.load_state_dict(checkpoint['scaler'])
-            print("With optim & scheduler!")
-def all_reduce_mean(x):
-    world_size = get_world_size()
-    if world_size > 1:
-        x_reduce = torch.tensor(x).cuda()
-        dist.all_reduce(x_reduce)
-        x_reduce /= world_size
-        return x_reduce.item()
-    else:
-        return x
-def plot_counts(res_csv: Union[str, List[str]], output_dir: str, suffix: str = "", smooth: bool = False):
-    if suffix:
-        suffix = f"_{suffix}"
-    if smooth:
-        suffix = f"_smooth{suffix}"
-    if type(res_csv) == str:
-        res_csv = [res_csv]
-    plt.figure(figsize=(15, 5))
-    for res in res_csv:
-        name = Path(res).parent.name
-        df = pd.read_csv(res)
-        print(df)
-        df.sort_values(by="name", inplace=True)
-        df.reset_index(drop=True, inplace=True)
-        df.index += 1
-        print(df)
-        if smooth:
-            time_arr = df.index[5:-5]
-            smooth_pred_mean = df['prediction'].iloc[5:-5].rolling(25).mean()
-            smooth_pred_std = df['prediction'].iloc[5:-5].rolling(25).std()
-            plt.plot(time_arr, smooth_pred_mean, label=name)
-            plt.fill_between(time_arr, smooth_pred_mean + smooth_pred_std, smooth_pred_mean - smooth_pred_std, alpha=.2)
-            plt.xlabel('Frame')
-            plt.ylabel('Count')
-        else:
-            plt.plot(df.index, df['prediction'], label=name)
-    plt.legend()
-    plt.savefig(os.path.join(output_dir, f'counts{suffix}.png'), dpi=300)
-def write_zeroshot_annotations(p: Path):
-    with open(p / 'annotations.json', 'a') as split:
-        split.write('{\n')
-        for img in p.iterdir():
-            if img.is_file():
-                split.write(f'  "{img.name}": {{\n' \
-                            '    "H": 960,\n' \
-                            '    "W": 1280,\n' \
-                            '    "box_examples_coordinates": [],\n' \
-                            '    "points": []\n' \
-                            '  },\n')
-        split.write("}")
-    with open(p / 'split.json', 'a') as split:
-        split.write('{\n  "test":\n  [\n')
-        for img in p.iterdir():
-            if img.is_file():
-                split.write(f'    "{img.name}",\n')
-        split.write("  ]\n}")
-def make_grid(imgs, h, w):
-    assert len(imgs) == 9
-    rows = []
-    for i in range(0, 9, 3):
-        row = torch.cat((imgs[i], imgs[i + 1], imgs[i + 2]), -1)
-        rows += [row]
-    grid = torch.cat((rows[0], rows[1], rows[2]), 0)
-    grid = transforms.Resize((h, w))(grid.unsqueeze(0))
-    return grid.squeeze(0)
-def min_max(t):
-    t_shape = t.shape
-    t = t.view(t_shape[0], -1)
-    t -= t.min(1, keepdim=True)[0]
-    t /= t.max(1, keepdim=True)[0]
-    t = t.view(*t_shape)
-    return t
-def min_max_np(v, new_min=0, new_max=1):
-    v_min, v_max = v.min(), v.max()
-    return (v - v_min) / (v_max - v_min) * (new_max - new_min) + new_min
-def get_box_map(sample, pos, device, external=False):
-    box_map = torch.zeros([sample.shape[1], sample.shape[2]], device=device)
-    if external is False:
-        for rect in pos:
-            for i in range(rect[2] - rect[0]):
-                box_map[min(rect[0] + i, sample.shape[1] - 1), min(rect[1], sample.shape[2] - 1)] = 10
-                box_map[min(rect[0] + i, sample.shape[1] - 1), min(rect[3], sample.shape[2] - 1)] = 10
-            for i in range(rect[3] - rect[1]):
-                box_map[min(rect[0], sample.shape[1] - 1), min(rect[1] + i, sample.shape[2] - 1)] = 10
-                box_map[min(rect[2], sample.shape[1] - 1), min(rect[1] + i, sample.shape[2] - 1)] = 10
-        box_map = box_map.unsqueeze(0).repeat(3, 1, 1)
-    return box_map
-timerfunc = time.perf_counter
-class measure_time(object):
-    def __enter__(self):
-        self.start = timerfunc()
-        return self
-    def __exit__(self, typ, value, traceback):
-        self.duration = timerfunc() - self.start
-    def __add__(self, other):
-        return self.duration + other.duration
-    def __sub__(self, other):
-        return self.duration - other.duration
-    def __str__(self):
-        return str(self.duration)
-def log_test_results(test_dir):
-    test_dir = Path(test_dir)
-    logs = []
-    for d in test_dir.iterdir():
-        if d.is_dir() and (d / "log.txt").exists():
-            print(d.name)
-            with open(d / "log.txt") as f:
-                last = f.readlines()[-1]
-                j = json.loads(last)
-                j['name'] = d.name
-                logs.append(j)
-    df = pd.DataFrame(logs)
-    df.sort_values('name', inplace=True, ignore_index=True)
-    cols = list(df.columns)
-    cols = cols[-1:] + cols[:-1]
-    df = df[cols]
-    df.to_csv(test_dir / "logs.csv", index=False)
-COLORS = {
-    'muted blue': '#1f77b4',
-    'safety orange': '#ff7f0e',
-    'cooked asparagus green': '#2ca02c',
-    'brick red': '#d62728',
-    'muted purple': '#9467bd',
-    'chestnut brown': '#8c564b',
-    'raspberry yogurt pink': '#e377c2',
-    'middle gray': '#7f7f7f',
-    'curry yellow-green': '#bcbd22',
-    'blue-teal': '#17becf',
-    'muted blue light': '#419ede',
-    'safety orange light': '#ffa85b',
-    'cooked asparagus green light': '#4bce4b',
-    'brick red light': '#e36667'
-}
-def plot_test_results(test_dir):
-    import plotly.graph_objects as go
-    test_dir = Path(test_dir)
-    df = pd.read_csv(test_dir / "logs.csv")
-    df.sort_values('name', inplace=True)
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(x=df['name'], y=df['MAE'], line_color=COLORS['muted blue'],
-                        mode='lines', name='MAE'))
-    fig.add_trace(go.Scatter(x=df['name'], y=df['RMSE'], line_color=COLORS['safety orange'],
-                        mode='lines', name='RMSE'))
-    fig.add_trace(go.Scatter(x=df['name'], y=df['NAE'], line_color=COLORS['cooked asparagus green'],
-                        mode='lines', name='NAE'))
-    fig.update_yaxes(type="log")
-    fig.write_image(test_dir / "plot.jpeg", scale=4)
-    fig.write_html(test_dir / "plot.html", auto_open=False)
-def frames2vid(input_dir: str, output_file: str, pattern: str, fps: int, h=720, w=1280):
-    input_dir = Path(input_dir)
-    video_file = None
-    files = sorted(input_dir.glob(pattern))
-    video_file = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
-    for img in tqdm(files, total=len(files)):
-        frame = cv2.imread(str(img))
-        frame = cv2.resize(frame, (w, h))
-        video_file.write(frame)
-    video_file.release()

VA-Count-main/VA-Count-main/util/pos_embed.py DELETED Viewed

@@ -1,97 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# --------------------------------------------------------
-# Position embedding utils
-# --------------------------------------------------------
-import numpy as np
-import torch
-# --------------------------------------------------------
-# 2D sine-cosine position embedding
-# References:
-# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py
-# MoCo v3: https://github.com/facebookresearch/moco-v3
-# --------------------------------------------------------
-def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
-    """
-    grid_size: int of the grid height and width
-    return:
-    pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
-    """
-    grid_h = np.arange(grid_size, dtype=np.float32)
-    grid_w = np.arange(grid_size, dtype=np.float32)
-    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
-    grid = np.stack(grid, axis=0)
-    grid = grid.reshape([2, 1, grid_size, grid_size])
-    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
-    if cls_token:
-        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
-    return pos_embed
-def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
-    assert embed_dim % 2 == 0
-    # use half of dimensions to encode grid_h
-    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
-    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
-    emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
-    return emb
-def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
-    """
-    embed_dim: output dimension for each position
-    pos: a list of positions to be encoded: size (M,)
-    out: (M, D)
-    """
-    assert embed_dim % 2 == 0
-    # omega = np.arange(embed_dim // 2, dtype=np.float)
-    omega = np.arange(embed_dim // 2, dtype=np.float32)
-    omega /= embed_dim / 2.
-    omega = 1. / 10000**omega  # (D/2,)
-    pos = pos.reshape(-1)  # (M,)
-    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
-    emb_sin = np.sin(out) # (M, D/2)
-    emb_cos = np.cos(out) # (M, D/2)
-    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
-    return emb
-# --------------------------------------------------------
-# Interpolate position embeddings for high-resolution
-# References:
-# DeiT: https://github.com/facebookresearch/deit
-# --------------------------------------------------------
-def interpolate_pos_embed(model, checkpoint_model):
-    if 'pos_embed' in checkpoint_model:
-        pos_embed_checkpoint = checkpoint_model['pos_embed']
-        embedding_size = pos_embed_checkpoint.shape[-1]
-        num_patches = model.patch_embed.num_patches
-        num_extra_tokens = model.pos_embed.shape[-2] - num_patches
-        # height (== width) for the checkpoint position embedding
-        orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
-        # height (== width) for the new position embedding
-        new_size = int(num_patches ** 0.5)
-        # class_token and dist_token are kept unchanged
-        if orig_size != new_size:
-            print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, new_size, new_size))
-            extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
-            # only the position tokens are interpolated
-            pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
-            pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
-            pos_tokens = torch.nn.functional.interpolate(
-                pos_tokens, size=(new_size, new_size), mode='bicubic', align_corners=False)
-            pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
-            new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
-            checkpoint_model['pos_embed'] = new_pos_embed