Spaces:
Build error
Build error
# -------------------------------------------------------- | |
# X-Decoder -- Generalized Decoding for Pixel, Image, and Language | |
# Copyright (c) 2022 Microsoft | |
# Licensed under The MIT License [see LICENSE for details] | |
# Modified by Xueyan Zou ([email protected]) | |
# -------------------------------------------------------- | |
import logging | |
import time | |
import datetime | |
import json | |
import os | |
import torch | |
import torch.nn as nn | |
from torch.utils.data import DataLoader | |
from typing import Tuple, Dict, List, Union | |
from infinibatch import iterators | |
from trainer.default_trainer import DefaultTrainer | |
from detectron2.evaluation import inference_on_dataset | |
from detectron2.utils.logger import log_every_n_seconds | |
from detectron2.data import MetadataCatalog | |
from modeling import build_model | |
from modeling.utils import get_class_names | |
from modeling.BaseModel import BaseModel | |
from datasets import build_evaluator, build_eval_dataloader, build_train_dataloader | |
from utilities.distributed import is_main_process | |
from utilities.constants import COCO_PANOPTIC_CLASSES | |
from trainer.utils.misc import move_batch_to_device, cast_batch_to_half | |
from .utils.misc import hook_metadata, hook_switcher, hook_opt | |
logger = logging.getLogger(__name__) | |
class XDecoderPipeline: | |
def __init__(self, opt): | |
self._opt = opt | |
print(self._opt['RESUME_FROM']) | |
def initialize_model(self): | |
model_name = "default" | |
model = build_model(self._opt) | |
model.train() | |
if is_main_process(): | |
logger.info(model) | |
raw_models = {model_name: BaseModel(self._opt, model)} | |
return raw_models | |
def get_dataloaders( | |
self, trainer: DefaultTrainer, | |
dataset_label: str, | |
is_evaluation: bool | |
) -> Union[DataLoader, iterators.CheckpointableIterator]: | |
distributed = self._opt['world_size'] > 1 | |
if is_evaluation: | |
if not hasattr(self, 'valid_loader'): | |
dataloaders = build_eval_dataloader(self._opt) | |
self.valid_loader = dataloaders | |
else: | |
dataloaders = self.valid_loader | |
idx = 0 if dataset_label=='dev' else self._opt['DATASETS']['TEST'].index(dataset_label) | |
dataloader = dataloaders[idx] | |
self.evaluator = build_evaluator(self._opt, self._opt['DATASETS']['TEST'][idx], self._opt['SAVE_DIR']) | |
else: | |
if not hasattr(self, 'train_loader'): | |
dataloader = build_train_dataloader(self._opt) | |
self.train_loader = dataloader | |
logger.info(f'num of train samples: {len(dataloader)}') | |
else: | |
dataloader = self.train_loader | |
# temp solution for lr scheduler | |
steps_total = len(self.train_loader) | |
steps_acc = self._opt['GRADIENT_ACCUMULATE_STEP'] | |
steps_update = steps_total // steps_acc | |
self._opt["LR_SCHEDULER_PARAMS"]["steps_update_per_epoch"] = steps_update | |
return dataloader | |
def forward_func(trainer, batch): | |
loss = trainer.models['default'](batch) | |
return loss | |
def forward_step( | |
self, | |
trainer: DefaultTrainer, | |
batch, | |
grad_acc_batches: List, | |
grad_acc_index: int, | |
is_distributed: bool, | |
) -> Tuple[Dict[str, float], Dict[str, int], Dict]: | |
loss_info, sample_size_info, extra_info = {}, {}, {} | |
batch = move_batch_to_device(batch, self._opt['device']) | |
if self._opt['FP16']: | |
# in FP16 mode, DeepSpeed casts the model to FP16, so the input needs to be manually casted to FP16 | |
batch = cast_batch_to_half(batch) | |
loss = trainer.compute_loss(self.forward_func, batch) | |
loss_info = {k: v.detach().item() for k,v in loss.items()} | |
sample_size_info = {'num_samples': len(batch)} | |
loss = sum(loss for loss in loss.values()) | |
trainer.backward_loss(loss, model_names=['default']) | |
trainer.update_model(model_name='default') | |
return loss_info, sample_size_info, extra_info | |
def evaluate_model( | |
self, | |
trainer: DefaultTrainer, | |
save_folder, | |
) -> Tuple[Dict, Dict[str, float], bool]: | |
model = trainer.raw_models['default'].eval() | |
self._opt = hook_opt(self._opt) | |
dataset_names = self._opt['DATASETS']['TEST'] | |
scores = {} | |
summary = {} | |
for dataset_label in dataset_names: | |
torch.cuda.empty_cache() | |
eval_batch_gen = self.get_dataloaders(trainer, dataset_label, is_evaluation=True) | |
self.evaluator.reset() | |
with torch.no_grad(): | |
names = get_class_names(dataset_label) | |
if self._opt['MODEL']['ENCODER']['BINARY_CLASSES']: | |
names = ['target', 'background'] | |
model.model.metadata = MetadataCatalog.get(dataset_label) | |
model.model.metadata = hook_metadata(model.model.metadata, dataset_label) | |
eval_type = model.model.metadata.evaluator_type | |
if 'background' in names: | |
model.model.sem_seg_head.num_classes = len(names) - 1 | |
model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(names, is_eval=True) | |
hook_switcher(model, dataset_label) | |
total = len(eval_batch_gen) | |
num_warmup = min(5, total - 1) | |
start_time = time.perf_counter() | |
total_data_time = 0 | |
total_compute_time = 0 | |
total_eval_time = 0 | |
start_data_time = time.perf_counter() | |
for idx, batch in enumerate(eval_batch_gen): | |
total_data_time += time.perf_counter() - start_data_time | |
if idx == num_warmup: | |
start_time = time.perf_counter() | |
total_data_time = 0 | |
total_compute_time = 0 | |
total_eval_time = 0 | |
start_compute_time = time.perf_counter() | |
batch = move_batch_to_device(batch, self._opt['device']) | |
if self._opt['FP16']: | |
# in FP16 mode, DeepSpeed casts the model to FP16, so the input needs to be manually casted to FP16 | |
batch = cast_batch_to_half(batch) | |
outputs = model(batch, mode=eval_type) | |
if torch.cuda.is_available(): | |
torch.cuda.synchronize() | |
total_compute_time += time.perf_counter() - start_compute_time | |
start_eval_time = time.perf_counter() | |
self.evaluator.process(batch, outputs) | |
total_eval_time += time.perf_counter() - start_eval_time | |
iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) | |
data_seconds_per_iter = total_data_time / iters_after_start | |
compute_seconds_per_iter = total_compute_time / iters_after_start | |
eval_seconds_per_iter = total_eval_time / iters_after_start | |
total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start | |
if is_main_process() and (idx >= num_warmup * 2 or compute_seconds_per_iter > 5): | |
eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1))) | |
log_every_n_seconds( | |
logging.INFO, | |
( | |
f"Task {dataset_label}. " | |
f"Inference done {idx + 1}/{total}. " | |
f"Dataloading: {data_seconds_per_iter:.4f} s/iter. " | |
f"Inference: {compute_seconds_per_iter:.4f} s/iter. " | |
f"Eval: {eval_seconds_per_iter:.4f} s/iter. " | |
f"Total: {total_seconds_per_iter:.4f} s/iter. " | |
f"ETA={eta}" | |
), | |
n=5, | |
) | |
start_data_time = time.perf_counter() | |
results = self.evaluator.evaluate() | |
model.model.sem_seg_head.predictor.lang_encoder.reset_text_embeddings() | |
if is_main_process(): | |
scores["{}/{}".format(dataset_label, eval_type)] = results | |
# set back to training stat. | |
model.model.sem_seg_head.num_classes = self._opt['MODEL']['ENCODER']['NUM_CLASSES'] | |
model.model.metadata = MetadataCatalog.get(self._opt['DATASETS']['TRAIN'][0]) | |
# save scores | |
if is_main_process(): | |
model_name = self._opt['RESUME_FROM'].split('/')[-1].split('.')[0] | |
with open(os.path.join(save_folder,f'{model_name}_eval_results.json'), 'w') as f: | |
json.dump(scores, f, indent=4) | |
# todo | |
# hack to return only results/scores | |
for datatype in scores: | |
for evaltype in scores[datatype]: | |
if 'instance_results' in scores[datatype][evaltype]: | |
scores[datatype][evaltype]= scores[datatype][evaltype]['scores'] | |
return scores |