diff --git a/docs/source/experiments/cifar.rst b/docs/source/experiments/cifar.rst index 34bd16ce..5e057720 100644 --- a/docs/source/experiments/cifar.rst +++ b/docs/source/experiments/cifar.rst @@ -57,19 +57,6 @@ Train Your Own Model --eval evaluating -Extending the Software ----------------------- - -This code is well written, easy to use and extendable for your own models or datasets: - -- Write your own Dataloader ``mydataset.py`` to ``dataset/`` folder - -- Write your own Model ``mymodel.py`` to ``model/`` folder - -- Run the program:: - - python main.py --dataset mydataset --model mymodel - Citation -------- diff --git a/docs/source/experiments/segmentation.rst b/docs/source/experiments/segmentation.rst index 16ac660d..7d1e2a60 100644 --- a/docs/source/experiments/segmentation.rst +++ b/docs/source/experiments/segmentation.rst @@ -38,25 +38,19 @@ Test Pre-trained Model .. role:: raw-html(raw) :format: html -+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ -| Model | pixAcc | mIoU | Command | Logs | -+==================================+===========+===========+==============================================================================================+============+ -| Encnet_ResNet50_PContext | 79.2% | 51.0% | :raw-html:`cmd` | ENC50PC_ | -+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ -| EncNet_ResNet101_PContext | 80.7% | 54.1% | :raw-html:`cmd` | ENC101PC_ | -+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ -| EncNet_ResNet50_ADE | 80.1% | 41.5% | :raw-html:`cmd` | ENC50ADE_ | -+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ -| EncNet_ResNet101_ADE | 81.3% | 44.4% | :raw-html:`cmd` | ENC101ADE_ | -+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ -| EncNet_ResNet101_VOC | N/A | 85.9% | :raw-html:`cmd` | ENC101VOC_ | -+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ - -.. _ENC50PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet50_pcontext.log?raw=true -.. _ENC101PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_pcontext.log?raw=true -.. _ENC50ADE: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet50_ade.log?raw=true -.. _ENC101ADE: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_ade.log?raw=true -.. _ENC101VOC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_voc.log?raw=true ++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+ +| Model | pixAcc | mIoU | Command | ++==================================+===========+===========+==============================================================================================+ +| Encnet_ResNet50_PContext | 79.2% | 51.0% | :raw-html:`cmd` | ++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+ +| EncNet_ResNet101_PContext | 80.7% | 54.1% | :raw-html:`cmd` | ++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+ +| EncNet_ResNet50_ADE | 80.1% | 41.5% | :raw-html:`cmd` | ++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+ +| EncNet_ResNet101_ADE | 81.3% | 44.4% | :raw-html:`cmd` | ++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+ +| EncNet_ResNet101_VOC | N/A | 85.9% | :raw-html:`cmd` | ++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+ .. raw:: html diff --git a/docs/source/experiments/texture.rst b/docs/source/experiments/texture.rst index 769c2a49..1278c429 100644 --- a/docs/source/experiments/texture.rst +++ b/docs/source/experiments/texture.rst @@ -22,16 +22,11 @@ Test Pre-trained Model cd PyTorch-Encoding/ python scripts/prepare_minc.py -- Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set):: +- Test pre-trained model on MINC-2500. The pre-trained weight will be automatic downloaded (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set):: - cd experiments/recognition - python model/download_models.py - -- Test pre-trained model on MINC-2500:: - - python main.py --dataset minc --model deepten --nclass 23 --resume deepten_minc.pth --eval + python main.py --dataset minc --model deepten_resnet50_minc --nclass 23 --pretrained --eval # Teriminal Output: - # Loss: 1.005 | Err: 18.96% (1090/5750): 100%|████████████████████| 23/23 [00:18<00:00, 1.26it/s] + # Loss: 0.995 | Err: 18.957% (1090/5750): 100%|████████████████████| 23/23 [00:18<00:00, 1.26it/s] Train Your Own Model @@ -39,7 +34,7 @@ Train Your Own Model - Example training command for training above model:: - CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py --dataset minc --model deepten --nclass 23 --model deepten --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step + CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py --dataset minc --model deepten_resnet50_minc --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step --weight-decay 5e-4 - Detail training options:: @@ -62,20 +57,6 @@ Train Your Own Model --eval evaluating -Extending the Software ----------------------- - -This code is well written, easy to use and extendable for your own models or datasets: - -- Write your own Dataloader ``mydataset.py`` to ``dataset/`` folder - -- Write your own Model ``mymodel.py`` to ``model/`` folder - -- Run the program:: - - python main.py --dataset mydataset --model mymodel - - Citation -------- diff --git a/docs/source/functions.rst b/docs/source/functions.rst deleted file mode 100644 index e3f3c8c4..00000000 --- a/docs/source/functions.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. role:: hidden - :class: hidden-section - -encoding.functions -================== - -.. automodule:: encoding.functions - -.. currentmodule:: encoding.functions - - -:hidden:`batchnormtrain` -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: batchnormtrain - -:hidden:`aggregate` -~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: aggregate - - -:hidden:`scaled_l2` -~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: scaled_l2 - - -:hidden:`sum_square` -~~~~~~~~~~~~~~~~~~~~ - -.. autofunction:: sum_square diff --git a/docs/source/index.rst b/docs/source/index.rst index 5302df1f..fb8a9567 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -30,8 +30,7 @@ An optimized PyTorch package with CUDA backend. nn parallel - dilated - functions + models utils Indices and tables diff --git a/docs/source/dilated.rst b/docs/source/models.rst similarity index 91% rename from docs/source/dilated.rst rename to docs/source/models.rst index 5aef805f..0ec7a81f 100644 --- a/docs/source/dilated.rst +++ b/docs/source/models.rst @@ -1,9 +1,15 @@ .. role:: hidden :class: hidden-section -encoding.dilated +encoding.models ================ +.. automodule:: encoding.models.resnet +.. currentmodule:: encoding.models.resnet + +ResNet +------ + We provide correct dilated pre-trained ResNet and DenseNet (stride of 8) for semantic segmentation. For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d`. All provided models have been verified. @@ -14,12 +20,6 @@ All provided models have been verified. * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* -.. automodule:: encoding.dilated -.. currentmodule:: encoding.dilated - -ResNet ------- - :hidden:`ResNet` ~~~~~~~~~~~~~~~~ diff --git a/docs/source/nn.rst b/docs/source/nn.rst index 927d7036..7310ac15 100644 --- a/docs/source/nn.rst +++ b/docs/source/nn.rst @@ -14,10 +14,10 @@ Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Norm .. autoclass:: Encoding :members: -:hidden:`BatchNorm2d` +:hidden:`SyncBatchNorm` ~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: BatchNorm2d +.. autoclass:: SyncBatchNorm :members: :hidden:`BatchNorm1d` @@ -26,6 +26,12 @@ Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Norm .. autoclass:: BatchNorm1d :members: +:hidden:`BatchNorm2d` +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: BatchNorm2d + :members: + :hidden:`BatchNorm3d` ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/notes/compile.rst b/docs/source/notes/compile.rst index 22b1ea20..40d570d8 100644 --- a/docs/source/notes/compile.rst +++ b/docs/source/notes/compile.rst @@ -2,13 +2,10 @@ Install and Citations ===================== -Install from Source -------------------- +Installation +------------ - * Install PyTorch by following the `PyTorch instructions `_. - This package relies on PyTorch master branch (higher than stable released v0.4.0), please follow - `the instruction `_ to install - PyTorch from source. + * Install PyTorch 1.0 by following the `PyTorch instructions `_. * PIP Install:: diff --git a/encoding/__init__.py b/encoding/__init__.py index 5dc68d55..2c33ce83 100644 --- a/encoding/__init__.py +++ b/encoding/__init__.py @@ -10,4 +10,4 @@ """An optimized PyTorch package with CUDA backend.""" from .version import __version__ -from . import nn, functions, dilated, parallel, utils, models, datasets +from . import nn, functions, parallel, utils, models, datasets, transforms diff --git a/encoding/datasets/__init__.py b/encoding/datasets/__init__.py index cdab5d76..ed9be3cf 100644 --- a/encoding/datasets/__init__.py +++ b/encoding/datasets/__init__.py @@ -1,3 +1,5 @@ +import warnings +from torchvision.datasets import * from .base import * from .coco import COCOSegmentation from .ade20k import ADE20KSegmentation @@ -5,6 +7,10 @@ from .pascal_aug import VOCAugSegmentation from .pcontext import ContextSegmentation from .cityscapes import CitySegmentation +from .imagenet import ImageNetDataset +from .minc import MINCDataset + +from ..utils import EncodingDeprecationWarning datasets = { 'coco': COCOSegmentation, @@ -13,7 +19,40 @@ 'pascal_aug': VOCAugSegmentation, 'pcontext': ContextSegmentation, 'citys': CitySegmentation, + 'imagenet': ImageNetDataset, + 'minc': MINCDataset, + 'cifar10': CIFAR10, +} + +acronyms = { + 'coco': 'coco', + 'pascal_voc': 'voc', + 'pascal_aug': 'voc', + 'pcontext': pcontext, + 'ade20k': 'ade', + 'citys': 'citys', + 'minc': 'minc', + 'cifar10': 'cifar10', } -def get_segmentation_dataset(name, **kwargs): +def get_dataset(name, **kwargs): return datasets[name.lower()](**kwargs) + +def _make_deprecate(meth, old_name): + new_name = meth.__name__ + + def deprecated_init(*args, **kwargs): + warnings.warn("encoding.dataset.{} is now deprecated in favor of encoding.dataset.{}." + .format(old_name, new_name), EncodingDeprecationWarning) + return meth(*args, **kwargs) + + deprecated_init.__doc__ = r""" + {old_name}(...) + .. warning:: + This method is now deprecated in favor of :func:`torch.nn.init.{new_name}`. + See :func:`~torch.nn.init.{new_name}` for details.""".format( + old_name=old_name, new_name=new_name) + deprecated_init.__name__ = old_name + return deprecated_init + +get_segmentation_dataset = _make_deprecate(get_dataset, 'get_segmentation_dataset') diff --git a/encoding/datasets/ade20k.py b/encoding/datasets/ade20k.py index 4ad1f853..56b172d1 100644 --- a/encoding/datasets/ade20k.py +++ b/encoding/datasets/ade20k.py @@ -57,6 +57,39 @@ def __getitem__(self, index): mask = self.target_transform(mask) return img, mask + def _sync_transform(self, img, mask): + # random mirror + if random.random() < 0.5: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + mask = mask.transpose(Image.FLIP_LEFT_RIGHT) + crop_size = self.crop_size + w, h = img.size + long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.5)) + if h > w: + oh = long_size + ow = int(1.0 * w * long_size / h + 0.5) + short_size = ow + else: + ow = long_size + oh = int(1.0 * h * long_size / w + 0.5) + short_size = oh + img = img.resize((ow, oh), Image.BILINEAR) + mask = mask.resize((ow, oh), Image.NEAREST) + # pad crop + if short_size < crop_size: + padh = crop_size - oh if oh < crop_size else 0 + padw = crop_size - ow if ow < crop_size else 0 + img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) + mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) + # random crop crop_size + w, h = img.size + x1 = random.randint(0, w - crop_size) + y1 = random.randint(0, h - crop_size) + img = img.crop((x1, y1, x1+crop_size, y1+crop_size)) + mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size)) + # final transform + return img, self._mask_transform(mask) + def _mask_transform(self, mask): target = np.array(mask).astype('int64') - 1 return torch.from_numpy(target) diff --git a/encoding/datasets/base.py b/encoding/datasets/base.py index d2d476f9..52b38fd5 100644 --- a/encoding/datasets/base.py +++ b/encoding/datasets/base.py @@ -67,15 +67,16 @@ def _sync_transform(self, img, mask): img = img.transpose(Image.FLIP_LEFT_RIGHT) mask = mask.transpose(Image.FLIP_LEFT_RIGHT) crop_size = self.crop_size - # random scale (short edge from 480 to 720) - short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0)) w, h = img.size + long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0)) if h > w: - ow = short_size - oh = int(1.0 * h * ow / w) + oh = long_size + ow = int(1.0 * w * long_size / h + 0.5) + short_size = ow else: - oh = short_size - ow = int(1.0 * w * oh / h) + ow = long_size + oh = int(1.0 * h * long_size / w + 0.5) + short_size = oh img = img.resize((ow, oh), Image.BILINEAR) mask = mask.resize((ow, oh), Image.NEAREST) # pad crop @@ -90,10 +91,6 @@ def _sync_transform(self, img, mask): y1 = random.randint(0, h - crop_size) img = img.crop((x1, y1, x1+crop_size, y1+crop_size)) mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size)) - # gaussian blur as in PSP - if random.random() < 0.5: - img = img.filter(ImageFilter.GaussianBlur( - radius=random.random())) # final transform return img, self._mask_transform(mask) diff --git a/encoding/datasets/cityscapes.py b/encoding/datasets/cityscapes.py index c5eeaaa2..8e3b2842 100644 --- a/encoding/datasets/cityscapes.py +++ b/encoding/datasets/cityscapes.py @@ -87,46 +87,6 @@ def __getitem__(self, index): mask = self.target_transform(mask) return img, mask - def _sync_transform(self, img, mask): - # random mirror - if random.random() < 0.5: - img = img.transpose(Image.FLIP_LEFT_RIGHT) - mask = mask.transpose(Image.FLIP_LEFT_RIGHT) - crop_size = self.crop_size - # random scale (short edge from 480 to 720) - short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0)) - w, h = img.size - if h > w: - ow = short_size - oh = int(1.0 * h * ow / w) - else: - oh = short_size - ow = int(1.0 * w * oh / h) - img = img.resize((ow, oh), Image.BILINEAR) - mask = mask.resize((ow, oh), Image.NEAREST) - # random rotate -10~10, mask using NN rotate - deg = random.uniform(-10, 10) - img = img.rotate(deg, resample=Image.BILINEAR) - mask = mask.rotate(deg, resample=Image.NEAREST) - # pad crop - if short_size < crop_size: - padh = crop_size - oh if oh < crop_size else 0 - padw = crop_size - ow if ow < crop_size else 0 - img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) - mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) - # random crop crop_size - w, h = img.size - x1 = random.randint(0, w - crop_size) - y1 = random.randint(0, h - crop_size) - img = img.crop((x1, y1, x1+crop_size, y1+crop_size)) - mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size)) - # gaussian blur as in PSP - if random.random() < 0.5: - img = img.filter(ImageFilter.GaussianBlur( - radius=random.random())) - # final transform - return img, self._mask_transform(mask) - def _mask_transform(self, mask): #target = np.array(mask).astype('int32') - 1 target = self._class_to_index(np.array(mask).astype('int32')) diff --git a/encoding/datasets/coco.py b/encoding/datasets/coco.py index 6bd39194..0cce3564 100644 --- a/encoding/datasets/coco.py +++ b/encoding/datasets/coco.py @@ -23,6 +23,7 @@ def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', self.root = os.path.join(root, 'train2017') else: print('val set') + assert split == 'val' ann_file = os.path.join(root, 'annotations/instances_val2017.json') ids_file = os.path.join(root, 'annotations/val_ids.pth') self.root = os.path.join(root, 'val2017') @@ -99,6 +100,7 @@ def _preprocess(self, ids, ids_file): print('Found number of qualified images: ', len(new_ids)) torch.save(new_ids, ids_file) return new_ids + """ NUM_CHANNEL = 91 [] background @@ -123,4 +125,3 @@ def _preprocess(self, ids, ids_file): [7] train [72] tv """ - diff --git a/encoding/datasets/imagenet.py b/encoding/datasets/imagenet.py new file mode 100644 index 00000000..78b375f3 --- /dev/null +++ b/encoding/datasets/imagenet.py @@ -0,0 +1,21 @@ +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +## Created by: Hang Zhang +## Email: zhanghang0704@gmail.com +## Copyright (c) 2018 +## +## This source code is licensed under the MIT-style license found in the +## LICENSE file in the root directory of this source tree +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +import os +import torchvision.transforms as transforms +import torchvision.datasets as datasets + +class ImageNetDataset(datasets.ImageFolder): + BASE_DIR = "ILSVRC2012" + def __init__(self, root=os.path.expanduser('~/.encoding/data'), transform=None, + target_transform=None, train=True, **kwargs): + split='train' if train == True else 'val' + root = os.path.join(root, self.BASE_DIR, split) + super(ImageNetDataset, self).__init__( + root, transform, target_transform) diff --git a/encoding/datasets/minc.py b/encoding/datasets/minc.py new file mode 100644 index 00000000..f64d1a4a --- /dev/null +++ b/encoding/datasets/minc.py @@ -0,0 +1,63 @@ +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +## Created by: Hang Zhang +## ECE Department, Rutgers University +## Email: zhang.hang@rutgers.edu +## Copyright (c) 2017 +## +## This source code is licensed under the MIT-style license found in the +## LICENSE file in the root directory of this source tree +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +import os +from PIL import Image + +import torch +import torch.utils.data as data + +class MINCDataset(data.Dataset): + NUM_CLASS = 23 + def __init__(self, root=os.path.expanduser('~/.encoding/data/minc-2500/'), + split='train', transform=None): + self.transform = transform + classes, class_to_idx = find_classes(root + '/images') + if split=='train': + filename = os.path.join(root, 'labels/train1.txt') + else: + filename = os.path.join(root, 'labels/test1.txt') + + self.images, self.labels = make_dataset(filename, root, + class_to_idx) + assert (len(self.images) == len(self.labels)) + + def __getitem__(self, index): + _img = Image.open(self.images[index]).convert('RGB') + _label = self.labels[index] + if self.transform is not None: + _img = self.transform(_img) + + return _img, _label + + def __len__(self): + return len(self.images) + +def find_classes(dir): + classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + + +def make_dataset(filename, datadir, class_to_idx): + images = [] + labels = [] + with open(os.path.join(filename), "r") as lines: + for line in lines: + _image = os.path.join(datadir, line.rstrip('\n')) + _dirname = os.path.split(os.path.dirname(_image))[1] + assert os.path.isfile(_image) + label = class_to_idx[_dirname] + images.append(_image) + labels.append(label) + + return images, labels + diff --git a/encoding/dilated/__init__.py b/encoding/dilated/__init__.py deleted file mode 100644 index ed888108..00000000 --- a/encoding/dilated/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Dilated ResNet and DenseNet""" -from .resnet import * diff --git a/encoding/functions/syncbn.py b/encoding/functions/syncbn.py index cf7dd615..e989f4a1 100644 --- a/encoding/functions/syncbn.py +++ b/encoding/functions/syncbn.py @@ -9,71 +9,291 @@ """Synchronized Cross-GPU Batch Normalization functions""" import torch +import torch.cuda.comm as comm from torch.autograd import Variable, Function +from torch.autograd.function import once_differentiable from .. import lib -__all__ = ['sum_square', 'batchnormtrain'] - -def sum_square(input): - r"""Calculate sum of elements and sum of squares for Batch Normalization""" - return _sum_square.apply(input) +__all__ = ['moments', 'syncbatchnorm', 'inp_syncbatchnorm'] +class moments(Function): + @staticmethod + def forward(ctx, x): + if x.is_cuda: + ex, ex2 = lib.gpu.expectation_forward(x) + else: + raise NotImplemented + return ex, ex2 -class _sum_square(Function): @staticmethod - def forward(ctx, input): - ctx.save_for_backward(input) - if input.is_cuda: - xsum, xsqusum = lib.gpu.sumsquare_forward(input) + def backward(ctx, dex, dex2): + if x.is_cuda: + dx = lib.gpu.expectation_backward(x, dex, dex2) else: - xsum, xsqusum = lib.cpu.sumsquare_forward(input) - return xsum, xsqusum + raise NotImplemented + return dx + +class syncbatchnorm_(Function): + @classmethod + def forward(cls, ctx, x, gamma, beta, running_mean, running_var, + extra, sync=True, training=True, momentum=0.1, eps=1e-05, + activation="none", slope=0.01): + # save context + cls._parse_extra(ctx, extra) + ctx.sync = sync + ctx.training = training + ctx.momentum = momentum + ctx.eps = eps + ctx.activation = activation + ctx.slope = slope + assert activation == 'none' + + # continous inputs + x = x.contiguous() + gamma = gamma.contiguous() + beta = beta.contiguous() + + if ctx.training: + if x.is_cuda: + _ex, _exs = lib.gpu.expectation_forward(x) + else: + raise NotImplemented + + if ctx.sync: + if ctx.is_master: + _ex, _exs = [_ex.unsqueeze(0)], [_exs.unsqueeze(0)] + for _ in range(ctx.master_queue.maxsize): + _ex_w, _exs_w = ctx.master_queue.get() + ctx.master_queue.task_done() + _ex.append(_ex_w.unsqueeze(0)) + _exs.append(_exs_w.unsqueeze(0)) + + _ex = comm.gather(_ex).mean(0) + _exs = comm.gather(_exs).mean(0) + + tensors = comm.broadcast_coalesced((_ex, _exs), [_ex.get_device()] + ctx.worker_ids) + for ts, queue in zip(tensors[1:], ctx.worker_queues): + queue.put(ts) + else: + ctx.master_queue.put((_ex, _exs)) + _ex, _exs = ctx.worker_queue.get() + ctx.worker_queue.task_done() + + # Update running stats + _var = _exs - _ex ** 2 + running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * _ex) + running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * _var) + + # Mark in-place modified tensors + ctx.mark_dirty(running_mean, running_var) + else: + _ex, _var = running_mean.contiguous(), running_var.contiguous() + _exs = _var + _ex ** 2 + + # BN forward + activation + if x.is_cuda: + y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) + else: + y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) + + # Output + ctx.save_for_backward(x, _ex, _exs, gamma, beta) + return y @staticmethod - def backward(ctx, gradSum, gradSquare): - input, = ctx.saved_variables - if input.is_cuda: - gradInput = lib.gpu.sumsquare_backward(input, gradSum, gradSquare) + @once_differentiable + def backward(ctx, dz): + x, _ex, _exs, gamma, beta = ctx.saved_tensors + dz = dz.contiguous() + + # BN backward + if dz.is_cuda: + dx, _dex, _dexs, dgamma, dbeta = \ + lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps) else: raise NotImplemented - return gradInput + if ctx.training: + if ctx.sync: + if ctx.is_master: + _dex, _dexs = [_dex.unsqueeze(0)], [_dexs.unsqueeze(0)] + for _ in range(ctx.master_queue.maxsize): + _dex_w, _dexs_w = ctx.master_queue.get() + ctx.master_queue.task_done() + _dex.append(_dex_w.unsqueeze(0)) + _dexs.append(_dexs_w.unsqueeze(0)) + + _dex = comm.gather(_dex).mean(0) + _dexs = comm.gather(_dexs).mean(0) + + tensors = comm.broadcast_coalesced((_dex, _dexs), [_dex.get_device()] + ctx.worker_ids) + for ts, queue in zip(tensors[1:], ctx.worker_queues): + queue.put(ts) + else: + ctx.master_queue.put((_dex, _dexs)) + _dex, _dexs = ctx.worker_queue.get() + ctx.worker_queue.task_done() + + if x.is_cuda: + dx_ = lib.gpu.expectation_backward(x, _dex, _dexs) + else: + raise NotImplemented + dx = dx + dx_ + + return dx, dgamma, dbeta, None, None, None, None, None, None, None, None, None -class _batchnormtrain(Function): @staticmethod - def forward(ctx, input, mean, std, gamma, beta): - ctx.save_for_backward(input, mean, std, gamma, beta) - if input.is_cuda: - output = lib.gpu.batchnorm_forward(input, mean, std, gamma, beta) + def _parse_extra(ctx, extra): + ctx.is_master = extra["is_master"] + if ctx.is_master: + ctx.master_queue = extra["master_queue"] + ctx.worker_queues = extra["worker_queues"] + ctx.worker_ids = extra["worker_ids"] + else: + ctx.master_queue = extra["master_queue"] + ctx.worker_queue = extra["worker_queue"] + +def _act_forward(ctx, x): + if ctx.activation.lower() == "leaky_relu": + if x.is_cuda: + lib.gpu.leaky_relu_forward(x, ctx.slope) + else: + raise NotImplemented + else: + assert activation == 'none' + +def _act_backward(ctx, x, dx): + if ctx.activation.lower() == "leaky_relu": + if x.is_cuda: + lib.gpu.leaky_relu_backward(x, dx, ctx.slope) else: - output = lib.cpu.batchnorm_forward(input, mean, std, gamma, beta) - return output + raise NotImplemented + else: + assert activation == 'none' + +class inp_syncbatchnorm_(Function): + @classmethod + def forward(cls, ctx, x, gamma, beta, running_mean, running_var, + extra, sync=True, training=True, momentum=0.1, eps=1e-05, + activation="none", slope=0.01): + # save context + cls._parse_extra(ctx, extra) + ctx.sync = sync + ctx.training = training + ctx.momentum = momentum + ctx.eps = eps + ctx.activation = activation + ctx.slope = slope + + # continous inputs + x = x.contiguous() + gamma = gamma.contiguous() + beta = beta.contiguous() + + if ctx.training: + if x.is_cuda: + _ex, _exs = lib.gpu.expectation_forward(x) + else: + raise NotImplemented + + if ctx.sync: + if ctx.is_master: + _ex, _exs = [_ex.unsqueeze(0)], [_exs.unsqueeze(0)] + for _ in range(ctx.master_queue.maxsize): + _ex_w, _exs_w = ctx.master_queue.get() + ctx.master_queue.task_done() + _ex.append(_ex_w.unsqueeze(0)) + _exs.append(_exs_w.unsqueeze(0)) + + _ex = comm.gather(_ex).mean(0) + _exs = comm.gather(_exs).mean(0) + + tensors = comm.broadcast_coalesced((_ex, _exs), [_ex.get_device()] + ctx.worker_ids) + for ts, queue in zip(tensors[1:], ctx.worker_queues): + queue.put(ts) + else: + ctx.master_queue.put((_ex, _exs)) + _ex, _exs = ctx.worker_queue.get() + ctx.worker_queue.task_done() + + # Update running stats + _var = _exs - _ex ** 2 + running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * _ex) + running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * _var) + + # Mark in-place modified tensors + ctx.mark_dirty(x, running_mean, running_var) + else: + _ex, _var = running_mean.contiguous(), running_var.contiguous() + _exs = _var + _ex ** 2 + ctx.mark_dirty(x) + + # BN forward + activation + if x.is_cuda: + lib.gpu.batchnorm_inp_forward(x, _ex, _exs, gamma, beta, ctx.eps) + else: + raise NotImplemented + + _act_forward(ctx, x) + + # Output + ctx.save_for_backward(x, _ex, _exs, gamma, beta) + return x @staticmethod - def backward(ctx, gradOutput): - input, mean, std, gamma, beta = ctx.saved_variables - if gradOutput.is_cuda: - gradInput, gradMean, gradStd, gradGamma, gradBeta = \ - lib.gpu.batchnorm_backward(gradOutput, input, mean, - std, gamma, beta, True) + @once_differentiable + def backward(ctx, dz): + z, _ex, _exs, gamma, beta = ctx.saved_tensors + dz = dz.contiguous() + + # Undo activation + _act_backward(ctx, z, dz) + + # BN backward + if dz.is_cuda: + dx, _dex, _dexs, dgamma, dbeta = \ + lib.gpu.batchnorm_inp_backward(dz, z, _ex, _exs, gamma, beta, ctx.eps) else: raise NotImplemented - return gradInput, gradMean, gradStd, gradGamma, gradBeta + if ctx.training: + if ctx.sync: + if ctx.is_master: + _dex, _dexs = [_dex.unsqueeze(0)], [_dexs.unsqueeze(0)] + for _ in range(ctx.master_queue.maxsize): + _dex_w, _dexs_w = ctx.master_queue.get() + ctx.master_queue.task_done() + _dex.append(_dex_w.unsqueeze(0)) + _dexs.append(_dexs_w.unsqueeze(0)) -def batchnormtrain(input, mean, std, gamma, beta): - r"""Applies Batch Normalization over a 3d input that is seen as a - mini-batch. + _dex = comm.gather(_dex).mean(0) + _dexs = comm.gather(_dexs).mean(0) - .. _encoding.batchnormtrain: + tensors = comm.broadcast_coalesced((_dex, _dexs), [_dex.get_device()] + ctx.worker_ids) + for ts, queue in zip(tensors[1:], ctx.worker_queues): + queue.put(ts) + else: + ctx.master_queue.put((_dex, _dexs)) + _dex, _dexs = ctx.worker_queue.get() + ctx.worker_queue.task_done() - .. math:: + if z.is_cuda: + lib.gpu.expectation_inp_backward(dx, z, _dex, _dexs, _ex, _exs, gamma, beta, ctx.eps) + else: + raise NotImplemented - y = \frac{x - \mu[x]}{ \sqrt{var[x] + \epsilon}} * \gamma + \beta + return dx, dgamma, dbeta, None, None, None, None, None, None, None, None, None - Shape: - - Input: :math:`(N, C)` or :math:`(N, C, L)` - - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) + @staticmethod + def _parse_extra(ctx, extra): + ctx.is_master = extra["is_master"] + if ctx.is_master: + ctx.master_queue = extra["master_queue"] + ctx.worker_queues = extra["worker_queues"] + ctx.worker_ids = extra["worker_ids"] + else: + ctx.master_queue = extra["master_queue"] + ctx.worker_queue = extra["worker_queue"] - """ - return _batchnormtrain.apply(input, mean, std, gamma, beta) +syncbatchnorm = syncbatchnorm_.apply +inp_syncbatchnorm = inp_syncbatchnorm_.apply diff --git a/encoding/lib/__init__.py b/encoding/lib/__init__.py index ff821e05..5675dfc6 100644 --- a/encoding/lib/__init__.py +++ b/encoding/lib/__init__.py @@ -17,9 +17,11 @@ if torch.cuda.is_available(): gpu = load('enclib_gpu', [ os.path.join(gpu_path, 'operator.cpp'), + os.path.join(gpu_path, 'activation_kernel.cu'), os.path.join(gpu_path, 'encoding_kernel.cu'), os.path.join(gpu_path, 'encodingv2_kernel.cu'), os.path.join(gpu_path, 'syncbn_kernel.cu'), os.path.join(gpu_path, 'roi_align_kernel.cu'), os.path.join(gpu_path, 'nms_kernel.cu'), - ], build_directory=gpu_path, verbose=False) + ], extra_cuda_cflags=["--expt-extended-lambda"], + build_directory=gpu_path, verbose=False) diff --git a/encoding/lib/cpu/__init__.py b/encoding/lib/cpu/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/encoding/lib/cpu/nms_cpu.cpp b/encoding/lib/cpu/nms_cpu.cpp index 82f1c7b9..d078f30e 100644 --- a/encoding/lib/cpu/nms_cpu.cpp +++ b/encoding/lib/cpu/nms_cpu.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/encoding/lib/cpu/roi_align.cpp b/encoding/lib/cpu/roi_align.cpp deleted file mode 100644 index bfbbafff..00000000 --- a/encoding/lib/cpu/roi_align.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -// CPU declarations - -at::Tensor ROIAlignForwardCPU( - const at::Tensor& input, - const at::Tensor& bottom_rois, - int64_t pooled_height, - int64_t pooled_width, - double spatial_scale, - int64_t sampling_ratio); - -at::Tensor ROIAlignBackwardCPU( - const at::Tensor& bottom_rois, - const at::Tensor& grad_output, // gradient of the output of the layer - int64_t b_size, - int64_t channels, - int64_t height, - int64_t width, - int64_t pooled_height, - int64_t pooled_width, - double spatial_scale, - int64_t sampling_ratio); - - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("roi_align_forward", &ROIAlignForwardCPU, "ROI Align forward (CPU)"); - m.def("roi_align_backward", &ROIAlignBackwardCPU, "ROI Align backward (CPU)"); -} diff --git a/encoding/lib/cpu/roi_align_cpu.cpp b/encoding/lib/cpu/roi_align_cpu.cpp index 4472bc59..52a4295b 100644 --- a/encoding/lib/cpu/roi_align_cpu.cpp +++ b/encoding/lib/cpu/roi_align_cpu.cpp @@ -1,4 +1,4 @@ -#include +#include #include //#include diff --git a/encoding/lib/cpu/syncbn_cpu.cpp b/encoding/lib/cpu/syncbn_cpu.cpp index 64cf5fbe..10e4dea2 100644 --- a/encoding/lib/cpu/syncbn_cpu.cpp +++ b/encoding/lib/cpu/syncbn_cpu.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/encoding/lib/gpu/__init__.py b/encoding/lib/gpu/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/encoding/lib/gpu/activation_kernel.cu b/encoding/lib/gpu/activation_kernel.cu new file mode 100644 index 00000000..d58118d7 --- /dev/null +++ b/encoding/lib/gpu/activation_kernel.cu @@ -0,0 +1,45 @@ +#include +#include +#include +// #include + +#include + +#include +#include + + +namespace { + +template +inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { + // Create thrust pointers + thrust::device_ptr th_z = thrust::device_pointer_cast(z); + thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); + + thrust::transform_if(th_dz, th_dz + count, th_z, th_dz, + [slope] __device__ (const T& dz) { return dz * slope; }, + [] __device__ (const T& z) { return z < 0; }); + thrust::transform_if(th_z, th_z + count, th_z, + [slope] __device__ (const T& z) { return z / slope; }, + [] __device__ (const T& z) { return z < 0; }); +} + +} + +void LeakyRelu_Forward_CUDA(at::Tensor z, float slope) { + at::leaky_relu_(z, slope); +} + +void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope) { + int64_t count = z.numel(); + + AT_DISPATCH_FLOATING_TYPES(z.type(), "LeakyRelu_Backward_CUDA", ([&] { + leaky_relu_backward_impl(z.data(), dz.data(), slope, count); + })); + /* + // unstable after scaling + at::leaky_relu_(z, 1.0 / slope); + at::leaky_relu_backward(dz, z, slope); + */ +} diff --git a/encoding/lib/gpu/encoding_kernel.cu b/encoding/lib/gpu/encoding_kernel.cu index bd40e151..a3e91c55 100644 --- a/encoding/lib/gpu/encoding_kernel.cu +++ b/encoding/lib/gpu/encoding_kernel.cu @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/encoding/lib/gpu/encodingv2_kernel.cu b/encoding/lib/gpu/encodingv2_kernel.cu index 97330309..068c2bd5 100644 --- a/encoding/lib/gpu/encodingv2_kernel.cu +++ b/encoding/lib/gpu/encodingv2_kernel.cu @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/encoding/lib/gpu/nms_kernel.cu b/encoding/lib/gpu/nms_kernel.cu index 464d0a6e..9c350a7f 100644 --- a/encoding/lib/gpu/nms_kernel.cu +++ b/encoding/lib/gpu/nms_kernel.cu @@ -1,4 +1,4 @@ -#include +#include #include #include "ATen/NativeFunctions.h" #include diff --git a/encoding/lib/gpu/operator.cpp b/encoding/lib/gpu/operator.cpp index 3faae98d..5d21a16e 100644 --- a/encoding/lib/gpu/operator.cpp +++ b/encoding/lib/gpu/operator.cpp @@ -9,9 +9,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("scaled_l2_forward", &ScaledL2_Forward_CUDA, "ScaledL2 forward (CUDA)"); m.def("scaled_l2_backward", &ScaledL2_Backward_CUDA, "ScaledL2 backward (CUDA)"); m.def("batchnorm_forward", &BatchNorm_Forward_CUDA, "BatchNorm forward (CUDA)"); + m.def("batchnorm_inp_forward", &BatchNorm_Forward_Inp_CUDA, "BatchNorm forward (CUDA)"); m.def("batchnorm_backward", &BatchNorm_Backward_CUDA, "BatchNorm backward (CUDA)"); - m.def("sumsquare_forward", &Sum_Square_Forward_CUDA, "SumSqu forward (CUDA)"); - m.def("sumsquare_backward", &Sum_Square_Backward_CUDA, "SumSqu backward (CUDA)"); + m.def("batchnorm_inp_backward", &BatchNorm_Inp_Backward_CUDA, "BatchNorm backward (CUDA)"); + m.def("expectation_forward", &Expectation_Forward_CUDA, "Expectation forward (CUDA)"); + m.def("expectation_backward", &Expectation_Backward_CUDA, "Expectation backward (CUDA)"); + m.def("expectation_inp_backward", &Expectation_Inp_Backward_CUDA, + "Inplace Expectation backward (CUDA)"); m.def("encoding_dist_forward", &Encoding_Dist_Forward_CUDA, "EncDist forward (CUDA)"); m.def("encoding_dist_backward", &Encoding_Dist_Backward_CUDA, "Assign backward (CUDA)"); m.def("encoding_dist_inference_forward", &Encoding_Dist_Inference_Forward_CUDA, @@ -20,4 +24,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { "Assign Inference backward (CUDA)"); m.def("aggregatev2_forward", &AggregateV2_Forward_CUDA, "AggregateV2 forward (CUDA)"); m.def("aggregatev2_backward", &AggregateV2_Backward_CUDA, "AggregateV2 backward (CUDA)"); + m.def("leaky_relu_forward", &LeakyRelu_Forward_CUDA, "Learky ReLU forward (CUDA)"); + m.def("leaky_relu_backward", &LeakyRelu_Backward_CUDA, "Learky ReLU backward (CUDA)"); } diff --git a/encoding/lib/gpu/operator.h b/encoding/lib/gpu/operator.h index 67e2972f..64dbe1de 100644 --- a/encoding/lib/gpu/operator.h +++ b/encoding/lib/gpu/operator.h @@ -1,4 +1,4 @@ -#include +#include #include at::Tensor ROIAlign_Forward_CUDA( @@ -54,36 +54,65 @@ at::Tensor BatchNorm_Forward_CUDA( const at::Tensor mean_, const at::Tensor std_, const at::Tensor gamma_, - const at::Tensor beta_); + const at::Tensor beta_, + float eps); + +at::Tensor BatchNorm_Forward_Inp_CUDA( + const at::Tensor input_, + const at::Tensor ex_, + const at::Tensor exs_, + const at::Tensor gamma_, + const at::Tensor beta_, + float eps); std::vector BatchNorm_Backward_CUDA( const at::Tensor gradoutput_, const at::Tensor input_, - const at::Tensor mean_, - const at::Tensor std_, + const at::Tensor ex_, + const at::Tensor exs_, + const at::Tensor gamma_, + const at::Tensor beta_, + float eps); + +std::vector BatchNorm_Inp_Backward_CUDA( + const at::Tensor gradoutput_, + const at::Tensor output_, + const at::Tensor ex_, + const at::Tensor exs_, const at::Tensor gamma_, - const at::Tensor beta_, - bool train); + const at::Tensor beta_, + float eps); -std::vector Sum_Square_Forward_CUDA( +std::vector Expectation_Forward_CUDA( const at::Tensor input_); -at::Tensor Sum_Square_Backward_CUDA( +at::Tensor Expectation_Backward_CUDA( const at::Tensor input_, - const at::Tensor gradSum_, - const at::Tensor gradSquare_); + const at::Tensor gradEx_, + const at::Tensor gradExs_); + +at::Tensor Expectation_Inp_Backward_CUDA( + const at::Tensor gradInput_, + const at::Tensor output_, + const at::Tensor gradEx_, + const at::Tensor gradExs_, + const at::Tensor ex_, + const at::Tensor exs_, + const at::Tensor gamma_, + const at::Tensor beta_, + float eps); at::Tensor Encoding_Dist_Inference_Forward_CUDA( - const at::Tensor X_, - const at::Tensor C_, - const at::Tensor STD_); + const at::Tensor X_, + const at::Tensor C_, + const at::Tensor STD_); std::vector Encoding_Dist_Inference_Backward_CUDA( - const at::Tensor GKD_, - const at::Tensor KD_, - const at::Tensor X_, - const at::Tensor C_, - const at::Tensor STD_); + const at::Tensor GKD_, + const at::Tensor KD_, + const at::Tensor X_, + const at::Tensor C_, + const at::Tensor STD_); std::vector Encoding_Dist_Forward_CUDA( const at::Tensor X, @@ -91,12 +120,12 @@ std::vector Encoding_Dist_Forward_CUDA( double eps); std::vector Encoding_Dist_Backward_CUDA( - const at::Tensor GKD_, - const at::Tensor GSTD_, - const at::Tensor KD_, - const at::Tensor X_, - const at::Tensor C_, - const at::Tensor STD_); + const at::Tensor GKD_, + const at::Tensor GSTD_, + const at::Tensor KD_, + const at::Tensor X_, + const at::Tensor C_, + const at::Tensor STD_); at::Tensor AggregateV2_Forward_CUDA( const at::Tensor A_, @@ -111,3 +140,7 @@ std::vector AggregateV2_Backward_CUDA( const at::Tensor X_, const at::Tensor C_, const at::Tensor STD_); + +void LeakyRelu_Forward_CUDA(at::Tensor z, float slope); + +void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope); diff --git a/encoding/lib/gpu/roi_align_kernel.cu b/encoding/lib/gpu/roi_align_kernel.cu index c55ee841..3c033537 100644 --- a/encoding/lib/gpu/roi_align_kernel.cu +++ b/encoding/lib/gpu/roi_align_kernel.cu @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/encoding/lib/gpu/setup.py b/encoding/lib/gpu/setup.py index 924b9998..f0ac8169 100644 --- a/encoding/lib/gpu/setup.py +++ b/encoding/lib/gpu/setup.py @@ -6,6 +6,7 @@ ext_modules=[ CUDAExtension('enclib_gpu', [ 'operator.cpp', + 'activation_kernel.cu', 'encoding_kernel.cu', 'encodingv2_kernel.cu', 'syncbn_kernel.cu', diff --git a/encoding/lib/gpu/syncbn_kernel.cu b/encoding/lib/gpu/syncbn_kernel.cu index 930bb953..ed509869 100644 --- a/encoding/lib/gpu/syncbn_kernel.cu +++ b/encoding/lib/gpu/syncbn_kernel.cu @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -11,14 +11,14 @@ namespace { template struct GradOp { __device__ GradOp(Acctype m, const DeviceTensor3 i, const DeviceTensor3 g) - : mean(m), input(i), gradOutput(g) {} + : beta(m), output(i), gradOutput(g) {} __device__ __forceinline__ Float2 operator()(int batch, int plane, int n) { DType g = gradOutput[batch][plane][n]; - DType c = ScalarConvert::to(input[batch][plane][n] - mean); + DType c = ScalarConvert::to(output[batch][plane][n] - beta); return Float2(g, g * c); } - const Acctype mean; - const DeviceTensor3 input; + const Acctype beta; + const DeviceTensor3 output; const DeviceTensor3 gradOutput; }; @@ -88,6 +88,72 @@ __global__ void BatchNorm_Forward_kernel ( } } +template +__global__ void BatchNorm_Forward_Inp_kernel ( + DeviceTensor input, + DeviceTensor mean, + DeviceTensor std, + DeviceTensor gamma, + DeviceTensor beta) { + int c = blockIdx.x; + /* main operation */ + for (int b = 0; b < input.getSize(0); ++b) { + for (int x = threadIdx.x; x < input.getSize(2); x += blockDim.x) { + DType inp = input[b][c][x]; + input[b][c][x] = gamma[c] * (inp - mean[c]) / + std[c] + beta[c]; + } + } +} + +template +__global__ void BatchNorm_Backward_Inp_kernel ( + DeviceTensor gradoutput, + DeviceTensor output, + DeviceTensor gradinput, + DeviceTensor gradgamma, + DeviceTensor gradbeta, + DeviceTensor mean, + DeviceTensor std, + DeviceTensor gamma, + DeviceTensor beta, + DeviceTensor gradEx, + DeviceTensor gradExs) { + /* declarations of the variables */ + /* Get the index and channels */ + int c = blockIdx.x; + /* main operation */ + GradOp> g(beta[c], output, gradoutput); + Float2 res = reduce, + GradOp>, + DeviceTensor>(g, gradoutput, c); + DType gradOutputSum = res.v1; + DType dotP = res.v2; + DType invstd = DType(1.0) / std[c]; + DType gradScale = invstd * gamma[c]; + if (threadIdx.x == 0) { + gradEx[c] = - gradOutputSum * gradScale + mean[c] * invstd * invstd * dotP; + gradExs[c] = - 0.5 * invstd * invstd * dotP; + } + if (gradinput.numElements() > 0) { + for (int batch = 0; batch < gradoutput.getSize(0); ++batch) { + for (int x = threadIdx.x; x < gradoutput.getSize(2); x += blockDim.x) { + gradinput[batch][c][x] = gradoutput[batch][c][x] * gradScale; + } + } + } + if (gradgamma.numElements() > 0) { + if (threadIdx.x == 0) { + gradgamma[c] += dotP / gamma[c]; + } + } + if (gradbeta.numElements() > 0) { + if (threadIdx.x == 0) { + gradbeta[c] += gradOutputSum; + } + } +} + template __global__ void BatchNorm_Backward_kernel ( DeviceTensor gradoutput, @@ -99,9 +165,8 @@ __global__ void BatchNorm_Backward_kernel ( DeviceTensor std, DeviceTensor gamma, DeviceTensor beta, - DeviceTensor gradMean, - DeviceTensor gradStd, - bool train) { + DeviceTensor gradEx, + DeviceTensor gradExs) { /* declarations of the variables */ /* Get the index and channels */ int c = blockIdx.x; @@ -114,9 +179,9 @@ __global__ void BatchNorm_Backward_kernel ( DType dotP = res.v2; DType invstd = DType(1.0) / std[c]; DType gradScale = invstd * gamma[c]; - if (train && threadIdx.x == 0) { - gradMean[c] = - gradOutputSum * gamma[c] * invstd; - gradStd[c] = - dotP * gamma[c] * invstd * invstd; + if (threadIdx.x == 0) { + gradEx[c] = - gradOutputSum * gradScale + mean[c] * invstd * invstd * dotP * gradScale; + gradExs[c] = - 0.5 * invstd * invstd * dotP * gradScale; } if (gradinput.numElements() > 0) { for (int batch = 0; batch < gradoutput.getSize(0); ++batch) { @@ -139,10 +204,11 @@ __global__ void BatchNorm_Backward_kernel ( template -__global__ void Sum_Square_Forward_kernel ( +__global__ void Expectation_Forward_kernel ( DeviceTensor input, - DeviceTensor sum, - DeviceTensor square) { + DeviceTensor ex, + DeviceTensor exs, + DType norm) { int c = blockIdx.x; /* main operation */ SumOp g(input); @@ -151,37 +217,60 @@ __global__ void Sum_Square_Forward_kernel ( DType xsum = res.v1; DType xsquare = res.v2; if (threadIdx.x == 0) { - sum[c] = xsum; - square[c] = xsquare; + ex[c] = xsum * norm; + exs[c] = xsquare * norm; } } template -__global__ void Sum_Square_Backward_kernel ( +__global__ void Expectation_Backward_kernel ( DeviceTensor gradInput, DeviceTensor input, - DeviceTensor gradSum, - DeviceTensor gradSquare) { + DeviceTensor gradEx, + DeviceTensor gradExs, + DType norm) { + int c = blockIdx.x; + /* main operation */ + for (int batch = 0; batch < gradInput.getSize(0); ++batch) { + for (int x = threadIdx.x; x < gradInput.getSize(2); x += blockDim.x) { + gradInput[batch][c][x] = gradEx[c] * norm + 2 * gradExs[c] * + input[batch][c][x] * norm; + } + } +} + +template +__global__ void Expectation_Backward_Inp_kernel ( + DeviceTensor gradInput, + DeviceTensor output, + DeviceTensor gradEx, + DeviceTensor gradExs, + DeviceTensor mean, + DeviceTensor std, + DeviceTensor gamma, + DeviceTensor beta, + DType norm) { int c = blockIdx.x; /* main operation */ for (int batch = 0; batch < gradInput.getSize(0); ++batch) { - for (int x = threadIdx.x; x < gradInput.getSize(2); x += blockDim.x) - { - gradInput[batch][c][x] = gradSum[c] + 2 * gradSquare[c] * - input[batch][c][x]; + for (int x = threadIdx.x; x < gradInput.getSize(2); x += blockDim.x) { + gradInput[batch][c][x] += gradEx[c] * norm + 2 * gradExs[c] * + ((output[batch][c][x] - beta[c]) / gamma[c] * std[c] + mean[c]) * norm; } - } + } } -} // namespcae +} // namespace at::Tensor BatchNorm_Forward_CUDA( const at::Tensor input_, - const at::Tensor mean_, - const at::Tensor std_, + const at::Tensor ex_, + const at::Tensor exs_, const at::Tensor gamma_, - const at::Tensor beta_) { + const at::Tensor beta_, + float eps) { auto output_ = at::zeros_like(input_); + auto std_ = (exs_ - ex_ * ex_ + eps).sqrt(); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(input_.size(1)); dim3 threads(getNumThreads(input_.size(2))); @@ -189,85 +278,157 @@ at::Tensor BatchNorm_Forward_CUDA( /* Device tensors */ DeviceTensor output = devicetensor(output_); DeviceTensor input = devicetensor(input_); - DeviceTensor mean = devicetensor(mean_); + DeviceTensor ex = devicetensor(ex_); DeviceTensor std = devicetensor(std_); DeviceTensor gamma = devicetensor(gamma_); DeviceTensor beta = devicetensor(beta_); /* kernel function */ BatchNorm_Forward_kernel<<>>( - output, input, mean, std, gamma, beta); + output, input, ex, std, gamma, beta); })); AT_ASSERT(cudaGetLastError() == cudaSuccess); return output_; } +at::Tensor BatchNorm_Forward_Inp_CUDA( + const at::Tensor input_, + const at::Tensor ex_, + const at::Tensor exs_, + const at::Tensor gamma_, + const at::Tensor beta_, + float eps) { + auto std_ = (exs_ - ex_ * ex_ + eps).sqrt(); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + dim3 blocks(input_.size(1)); + dim3 threads(getNumThreads(input_.size(2))); + AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Forward_CUDA", ([&] { + /* Device tensors */ + DeviceTensor input = devicetensor(input_); + DeviceTensor ex = devicetensor(ex_); + DeviceTensor std = devicetensor(std_); + DeviceTensor gamma = devicetensor(gamma_); + DeviceTensor beta = devicetensor(beta_); + /* kernel function */ + BatchNorm_Forward_Inp_kernel<<>>( + input, ex, std, gamma, beta); + })); + AT_ASSERT(cudaGetLastError() == cudaSuccess); + return input_; +} + + +std::vector BatchNorm_Inp_Backward_CUDA( + const at::Tensor gradoutput_, + const at::Tensor output_, + const at::Tensor ex_, + const at::Tensor exs_, + const at::Tensor gamma_, + const at::Tensor beta_, + float eps) { + /* outputs*/ + auto std_ = (exs_ - ex_ * ex_ + eps).sqrt(); + auto gradinput_ = at::zeros_like(output_); + auto gradgamma_ = at::zeros_like(gamma_); + auto gradbeta_ = at::zeros_like(beta_); + auto gradEx_ = at::zeros_like(ex_); + auto gradExs_ = at::zeros_like(std_); + /* cuda utils*/ + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + dim3 blocks(output_.size(1)); + dim3 threads(getNumThreads(output_.size(2))); + AT_DISPATCH_FLOATING_TYPES(output_.type(), "BatchNorm_Inp_Backward_CUDA", ([&] { + /* Device tensors */ + DeviceTensor gradoutput = devicetensor(gradoutput_); + DeviceTensor output = devicetensor(output_); + DeviceTensor gradinput = devicetensor(gradinput_); + DeviceTensor gradgamma = devicetensor(gradgamma_); + DeviceTensor gradbeta = devicetensor(gradbeta_); + DeviceTensor ex = devicetensor(ex_); + DeviceTensor std = devicetensor(std_); + DeviceTensor gamma = devicetensor(gamma_); + DeviceTensor beta = devicetensor(beta_); + DeviceTensor gradEx = devicetensor(gradEx_); + DeviceTensor gradExs = devicetensor(gradExs_); + /* kernel function */ + BatchNorm_Backward_Inp_kernel + <<>>( + gradoutput, output, gradinput, gradgamma, gradbeta, ex, std, + gamma, beta, gradEx, gradExs); + })); + AT_ASSERT(cudaGetLastError() == cudaSuccess); + return {gradinput_, gradEx_, gradExs_, gradgamma_, gradbeta_}; +} + + std::vector BatchNorm_Backward_CUDA( const at::Tensor gradoutput_, const at::Tensor input_, - const at::Tensor mean_, - const at::Tensor std_, + const at::Tensor ex_, + const at::Tensor exs_, const at::Tensor gamma_, - const at::Tensor beta_, - bool train) { + const at::Tensor beta_, + float eps) { /* outputs*/ - at::Tensor gradinput_ = at::zeros_like(input_); - at::Tensor gradgamma_ = at::zeros_like(gamma_); - at::Tensor gradbeta_ = at::zeros_like(beta_); - at::Tensor gradMean_ = at::zeros_like(mean_); - at::Tensor gradStd_ = at::zeros_like(std_); + auto std_ = (exs_ - ex_ * ex_ + eps).sqrt(); + auto gradinput_ = at::zeros_like(input_); + auto gradgamma_ = at::zeros_like(gamma_); + auto gradbeta_ = at::zeros_like(beta_); + auto gradEx_ = at::zeros_like(ex_); + auto gradExs_ = at::zeros_like(std_); /* cuda utils*/ cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(input_.size(1)); dim3 threads(getNumThreads(input_.size(2))); - AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Backward_CUDA", ([&] { + AT_DISPATCH_FLOATING_TYPES(input_.type(), "BatchNorm_Inp_Backward_CUDA", ([&] { /* Device tensors */ DeviceTensor gradoutput = devicetensor(gradoutput_); DeviceTensor input = devicetensor(input_); DeviceTensor gradinput = devicetensor(gradinput_); DeviceTensor gradgamma = devicetensor(gradgamma_); DeviceTensor gradbeta = devicetensor(gradbeta_); - DeviceTensor mean = devicetensor(mean_); + DeviceTensor ex = devicetensor(ex_); DeviceTensor std = devicetensor(std_); DeviceTensor gamma = devicetensor(gamma_); DeviceTensor beta = devicetensor(beta_); - DeviceTensor gradMean = devicetensor(gradMean_); - DeviceTensor gradStd = devicetensor(gradStd_); + DeviceTensor gradEx = devicetensor(gradEx_); + DeviceTensor gradExs = devicetensor(gradExs_); /* kernel function */ BatchNorm_Backward_kernel <<>>( - gradoutput, input, gradinput, gradgamma, gradbeta, mean, std, - gamma, beta, gradMean, gradStd, train); + gradoutput, input, gradinput, gradgamma, gradbeta, ex, std, + gamma, beta, gradEx, gradExs); })); AT_ASSERT(cudaGetLastError() == cudaSuccess); - return {gradinput_, gradMean_, gradStd_, gradgamma_, gradbeta_}; + return {gradinput_, gradEx_, gradExs_, gradgamma_, gradbeta_}; } -std::vector Sum_Square_Forward_CUDA( +std::vector Expectation_Forward_CUDA( const at::Tensor input_) { /* outputs */ - at::Tensor sum_ = torch::zeros({input_.size(1)}, input_.options()); - at::Tensor square_ = torch::zeros({input_.size(1)}, input_.options()); + auto ex_ = torch::zeros({input_.size(1)}, input_.options()); + auto exs_ = torch::zeros({input_.size(1)}, input_.options()); /* cuda utils*/ cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 blocks(input_.size(1)); dim3 threads(getNumThreads(input_.size(2))); AT_DISPATCH_FLOATING_TYPES(input_.type(), "SumSquare_forward_CUDA", ([&] { + scalar_t norm = scalar_t(1) / (input_.size(0) * input_.size(2)); /* Device tensors */ DeviceTensor input = devicetensor(input_); - DeviceTensor sum = devicetensor(sum_); - DeviceTensor square = devicetensor(square_); + DeviceTensor ex = devicetensor(ex_); + DeviceTensor exs = devicetensor(exs_); /* kernel function */ - Sum_Square_Forward_kernel - <<>>(input, sum, square); + Expectation_Forward_kernel + <<>>(input, ex, exs, norm); })); AT_ASSERT(cudaGetLastError() == cudaSuccess); - return {sum_, square_}; + return {ex_, exs_}; } -at::Tensor Sum_Square_Backward_CUDA( +at::Tensor Expectation_Backward_CUDA( const at::Tensor input_, - const at::Tensor gradSum_, - const at::Tensor gradSquare_) { + const at::Tensor gradEx_, + const at::Tensor gradExs_) { /* outputs */ at::Tensor gradInput_ = at::zeros_like(input_); /* cuda utils*/ @@ -275,14 +436,52 @@ at::Tensor Sum_Square_Backward_CUDA( dim3 blocks(input_.size(1)); dim3 threads(getNumThreads(input_.size(2))); AT_DISPATCH_FLOATING_TYPES(input_.type(), "SumSquare_Backward_CUDA", ([&] { + scalar_t norm = scalar_t(1) / (input_.size(0) * input_.size(2)); /* Device tensors */ DeviceTensor gradInput = devicetensor(gradInput_); DeviceTensor input = devicetensor(input_); - DeviceTensor gradSum = devicetensor(gradSum_); - DeviceTensor gradSquare =devicetensor(gradSquare_); + DeviceTensor gradEx = devicetensor(gradEx_); + DeviceTensor gradExs =devicetensor(gradExs_); + /* kernel function */ + Expectation_Backward_kernel + <<>>(gradInput, input, gradEx, gradExs, norm); + })); + AT_ASSERT(cudaGetLastError() == cudaSuccess); + return gradInput_; +} + +at::Tensor Expectation_Inp_Backward_CUDA( + const at::Tensor gradInput_, + const at::Tensor output_, + const at::Tensor gradEx_, + const at::Tensor gradExs_, + const at::Tensor ex_, + const at::Tensor exs_, + const at::Tensor gamma_, + const at::Tensor beta_, + float eps) { + /* outputs */ + //auto gradInput_ = at::zeros_like(output_); + auto std_ = (exs_ - ex_ * ex_ + eps).sqrt(); + /* cuda utils*/ + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + dim3 blocks(output_.size(1)); + dim3 threads(getNumThreads(output_.size(2))); + AT_DISPATCH_FLOATING_TYPES(output_.type(), "SumSquare_Backward_CUDA", ([&] { + scalar_t norm = scalar_t(1) / (output_.size(0) * output_.size(2)); + /* Device tensors */ + DeviceTensor gradInput = devicetensor(gradInput_); + DeviceTensor input = devicetensor(output_); + DeviceTensor gradEx = devicetensor(gradEx_); + DeviceTensor gradExs =devicetensor(gradExs_); + DeviceTensor ex = devicetensor(ex_); + DeviceTensor std = devicetensor(std_); + DeviceTensor gamma = devicetensor(gamma_); + DeviceTensor beta = devicetensor(beta_); /* kernel function */ - Sum_Square_Backward_kernel - <<>>(gradInput, input, gradSum, gradSquare); + Expectation_Backward_Inp_kernel + <<>>(gradInput, input, gradEx, gradExs, + ex, std, gamma, beta, norm); })); AT_ASSERT(cudaGetLastError() == cudaSuccess); return gradInput_; diff --git a/encoding/models/__init__.py b/encoding/models/__init__.py index bc101c1d..0211e48c 100644 --- a/encoding/models/__init__.py +++ b/encoding/models/__init__.py @@ -1,15 +1,21 @@ from .model_zoo import get_model from .model_store import get_model_file +from .resnet import * +from .cifarresnet import * from .base import * from .fcn import * from .psp import * from .encnet import * +from .deeplab import * def get_segmentation_model(name, **kwargs): from .fcn import get_fcn models = { 'fcn': get_fcn, 'psp': get_psp, + 'atten': get_atten, 'encnet': get_encnet, + 'encnetv2': get_encnetv2, + 'deeplab': get_deeplab, } return models[name.lower()](**kwargs) diff --git a/encoding/models/base.py b/encoding/models/base.py index 1b54ecf8..e4e21022 100644 --- a/encoding/models/base.py +++ b/encoding/models/base.py @@ -10,12 +10,11 @@ import torch import torch.nn as nn import torch.nn.functional as F -from torch.nn.functional import upsample from torch.nn.parallel.data_parallel import DataParallel from torch.nn.parallel.parallel_apply import parallel_apply from torch.nn.parallel.scatter_gather import scatter -from .. import dilated as resnet +from . import resnet from ..utils import batch_pix_accuracy, batch_intersection_union up_kwargs = {'mode': 'bilinear', 'align_corners': True} @@ -35,6 +34,7 @@ def __init__(self, nclass, backbone, aux, se_loss, dilated=True, norm_layer=None self.base_size = base_size self.crop_size = crop_size # copying modules from pretrained models + self.backbone = backbone if backbone == 'resnet50': self.pretrained = resnet.resnet50(pretrained=True, dilated=dilated, norm_layer=norm_layer, root=root) @@ -50,14 +50,28 @@ def __init__(self, nclass, backbone, aux, se_loss, dilated=True, norm_layer=None self._up_kwargs = up_kwargs def base_forward(self, x): - x = self.pretrained.conv1(x) - x = self.pretrained.bn1(x) - x = self.pretrained.relu(x) - x = self.pretrained.maxpool(x) - c1 = self.pretrained.layer1(x) - c2 = self.pretrained.layer2(c1) - c3 = self.pretrained.layer3(c2) - c4 = self.pretrained.layer4(c3) + if self.backbone.startswith('wideresnet'): + x = self.pretrained.mod1(x) + x = self.pretrained.pool2(x) + x = self.pretrained.mod2(x) + x = self.pretrained.pool3(x) + x = self.pretrained.mod3(x) + x = self.pretrained.mod4(x) + x = self.pretrained.mod5(x) + c3 = x.clone() + x = self.pretrained.mod6(x) + x = self.pretrained.mod7(x) + x = self.pretrained.bn_out(x) + return None, None, c3, x + else: + x = self.pretrained.conv1(x) + x = self.pretrained.bn1(x) + x = self.pretrained.relu(x) + x = self.pretrained.maxpool(x) + c1 = self.pretrained.layer1(x) + c2 = self.pretrained.layer2(c1) + c3 = self.pretrained.layer3(c2) + c4 = self.pretrained.layer4(c3) return c1, c2, c3, c4 def evaluate(self, x, target=None): @@ -124,6 +138,17 @@ def forward(self, image): width = long_size height = int(1.0 * h * long_size / w + 0.5) short_size = height + """ + short_size = int(math.ceil(self.base_size * scale)) + if h > w: + width = short_size + height = int(1.0 * h * short_size / w) + long_size = height + else: + height = short_size + width = int(1.0 * w * short_size / h) + long_size = width + """ # resize image to current size cur_img = resize_image(image, height, width, **self.module._up_kwargs) if long_size <= crop_size: @@ -180,7 +205,7 @@ def module_inference(module, image, flip=True): return output.exp() def resize_image(img, h, w, **up_kwargs): - return F.upsample(img, (h, w), **up_kwargs) + return F.interpolate(img, (h, w), **up_kwargs) def pad_image(img, mean, std, crop_size): b,c,h,w = img.size() diff --git a/encoding/models/cifarresnet.py b/encoding/models/cifarresnet.py new file mode 100644 index 00000000..f61993ab --- /dev/null +++ b/encoding/models/cifarresnet.py @@ -0,0 +1,139 @@ +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +## Created by: Hang Zhang +## ECE Department, Rutgers University +## Email: zhang.hang@rutgers.edu +## Copyright (c) 2017 +## +## This source code is licensed under the MIT-style license found in the +## LICENSE file in the root directory of this source tree +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +import torch +import torch.nn as nn +from torch.autograd import Variable +from ..nn import View + +__all__ = ['cifar_resnet20'] + +def conv3x3(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + +class Basicblock(nn.Module): + """ Pre-activation residual block + Identity Mapping in Deep Residual Networks + ref https://arxiv.org/abs/1603.05027 + """ + expansion = 1 + def __init__(self, inplanes, planes, stride=1, norm_layer=nn.BatchNorm2d): + super(Basicblock, self).__init__() + if inplanes != planes or stride !=1 : + self.downsample = True + self.residual_layer = nn.Conv2d(inplanes, planes, + kernel_size=1, stride=stride) + else: + self.downsample = False + conv_block=[] + conv_block+=[norm_layer(inplanes), + nn.ReLU(inplace=True), + conv3x3(inplanes, planes,stride=stride), + norm_layer(planes), + nn.ReLU(inplace=True), + conv3x3(planes, planes)] + self.conv_block = nn.Sequential(*conv_block) + + def forward(self, input): + if self.downsample: + residual = self.residual_layer(input) + else: + residual = input + return residual + self.conv_block(input) + + +class Bottleneck(nn.Module): + """ Pre-activation residual block + Identity Mapping in Deep Residual Networks + ref https://arxiv.org/abs/1603.05027 + """ + expansion = 4 + def __init__(self, inplanes, planes, stride=1, norm_layer=nn.BatchNorm2d): + super(Bottleneck, self).__init__() + if inplanes != planes*self.expansion or stride !=1 : + self.downsample = True + self.residual_layer = nn.Conv2d(inplanes, + planes * self.expansion, kernel_size=1, stride=stride) + else: + self.downsample = False + conv_block = [] + conv_block += [norm_layer(inplanes), + nn.ReLU(inplace=True), + nn.Conv2d(inplanes, planes, kernel_size=1, + stride=1, bias=False)] + conv_block += [norm_layer(planes), + nn.ReLU(inplace=True), + nn.Conv2d(planes, planes, kernel_size=3, + stride=stride, padding=1, bias=False)] + conv_block += [norm_layer(planes), + nn.ReLU(inplace=True), + nn.Conv2d(planes, planes * self.expansion, + kernel_size=1, stride=1, bias=False)] + self.conv_block = nn.Sequential(*conv_block) + + def forward(self, x): + if self.downsample: + residual = self.residual_layer(x) + else: + residual = x + return residual + self.conv_block(x) + + +class CIFAR_ResNet(nn.Module): + def __init__(self, block=Basicblock, num_blocks=[2,2,2], width_factor = 1, + num_classes=10, norm_layer=torch.nn.BatchNorm2d): + super(CIFAR_ResNet, self).__init__() + self.expansion = block.expansion + + self.inplanes = int(width_factor * 16) + strides = [1, 2, 2] + model = [] + # Conv_1 + model += [nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1), + norm_layer(self.inplanes), + nn.ReLU(inplace=True)] + # Residual units + model += [self._residual_unit(block, self.inplanes, num_blocks[0], + strides[0], norm_layer=norm_layer)] + for i in range(2): + model += [self._residual_unit( + block, int(2*self.inplanes/self.expansion), + num_blocks[i+1], strides[i+1], norm_layer=norm_layer)] + # Last conv layer + model += [norm_layer(self.inplanes), + nn.ReLU(inplace=True), + nn.AvgPool2d(8), + View(-1, self.inplanes), + nn.Linear(self.inplanes, num_classes)] + self.model = nn.Sequential(*model) + + def _residual_unit(self, block, planes, n_blocks, stride, norm_layer): + strides = [stride] + [1]*(n_blocks-1) + layers = [] + for i in range(n_blocks): + layers += [block(self.inplanes, planes, strides[i], norm_layer=norm_layer)] + self.inplanes = self.expansion*planes + return nn.Sequential(*layers) + + def forward(self, input): + return self.model(input) + + +def cifar_resnet20(pretrained=False, root='~/.encoding/models', **kwargs): + """Constructs a CIFAR ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = CIFAR_ResNet(Bottleneck, [3, 3, 3], **kwargs) + if pretrained: + model.load_state_dict(torch.load( + get_model_file('cifar_resnet20', root=root)), strict=False) + return model diff --git a/encoding/models/deeplab.py b/encoding/models/deeplab.py new file mode 100644 index 00000000..f921bbe7 --- /dev/null +++ b/encoding/models/deeplab.py @@ -0,0 +1,140 @@ +########################################################################### +# Created by: Hang Zhang +# Email: zhang.hang@rutgers.edu +# Copyright (c) 2017 +########################################################################### +from __future__ import division +import os +import numpy as np +import torch +import torch.nn as nn +from torch.nn.functional import interpolate + +from .base import BaseNet +from .fcn import FCNHead + +class DeepLabV3(BaseNet): + def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): + super(DeepLabV3, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) + self.head = DeepLabV3Head(2048, nclass, norm_layer, self._up_kwargs) + if aux: + self.auxlayer = FCNHead(1024, nclass, norm_layer) + + def forward(self, x): + _, _, h, w = x.size() + _, _, c3, c4 = self.base_forward(x) + + outputs = [] + x = self.head(c4) + x = interpolate(x, (h,w), **self._up_kwargs) + outputs.append(x) + if self.aux: + auxout = self.auxlayer(c3) + auxout = interpolate(auxout, (h,w), **self._up_kwargs) + outputs.append(auxout) + return tuple(outputs) + + +class DeepLabV3Head(nn.Module): + def __init__(self, in_channels, out_channels, norm_layer, up_kwargs, atrous_rates=[12, 24, 36], **kwargs): + super(DeepLabV3Head, self).__init__() + inter_channels = in_channels // 8 + self.aspp = ASPP_Module(in_channels, atrous_rates, norm_layer, up_kwargs, **kwargs) + self.block = nn.Sequential( + nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False), + norm_layer(inter_channels), + nn.ReLU(True), + nn.Dropout2d(0.1, False), + nn.Conv2d(inter_channels, out_channels, 1)) + + def forward(self, x): + x = self.aspp(x) + x = self.block(x) + return x + + +def ASPPConv(in_channels, out_channels, atrous_rate, norm_layer): + block = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, padding=atrous_rate, + dilation=atrous_rate, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + return block + +class AsppPooling(nn.Module): + def __init__(self, in_channels, out_channels, norm_layer, up_kwargs): + super(AsppPooling, self).__init__() + self._up_kwargs = up_kwargs + self.gap = nn.Sequential(nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + + def forward(self, x): + _, _, h, w = x.size() + pool = self.gap(x) + return interpolate(pool, (h,w), **self._up_kwargs) + +class ASPP_Module(nn.Module): + def __init__(self, in_channels, atrous_rates, norm_layer, up_kwargs): + super(ASPP_Module, self).__init__() + out_channels = in_channels // 8 + rate1, rate2, rate3 = tuple(atrous_rates) + self.b0 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + self.b1 = ASPPConv(in_channels, out_channels, rate1, norm_layer) + self.b2 = ASPPConv(in_channels, out_channels, rate2, norm_layer) + self.b3 = ASPPConv(in_channels, out_channels, rate3, norm_layer) + self.b4 = AsppPooling(in_channels, out_channels, norm_layer, up_kwargs) + + self.project = nn.Sequential( + nn.Conv2d(5*out_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True), + nn.Dropout2d(0.5, False)) + + def forward(self, x): + feat0 = self.b0(x) + feat1 = self.b1(x) + feat2 = self.b2(x) + feat3 = self.b3(x) + feat4 = self.b4(x) + y = torch.cat((feat0, feat1, feat2, feat3, feat4), 1) + return self.project(y) + +def get_deeplab(dataset='pascal_voc', backbone='resnet50', pretrained=False, + root='~/.encoding/models', **kwargs): + acronyms = { + 'pascal_voc': 'voc', + 'pascal_aug': 'voc', + 'ade20k': 'ade', + } + # infer number of classes + from ..datasets import datasets, VOCSegmentation, VOCAugSegmentation, ADE20KSegmentation + model = DeepLabV3(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) + if pretrained: + from .model_store import get_model_file + model.load_state_dict(torch.load( + get_model_file('deeplab_%s_%s'%(backbone, acronyms[dataset]), root=root))) + return model + +def get_deeplab_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs): + r"""DeepLabV3 model from the paper `"Context Encoding for Semantic Segmentation" + `_ + + Parameters + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.encoding/models' + Location for keeping the model parameters. + + + Examples + -------- + >>> model = get_deeplab_resnet50_ade(pretrained=True) + >>> print(model) + """ + return get_deeplab('ade20k', 'resnet50', pretrained, root=root, **kwargs) diff --git a/encoding/models/deepten.py b/encoding/models/deepten.py new file mode 100644 index 00000000..50ef8281 --- /dev/null +++ b/encoding/models/deepten.py @@ -0,0 +1,97 @@ +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +## Created by: Hang Zhang +## ECE Department, Rutgers University +## Email: zhang.hang@rutgers.edu +## Copyright (c) 2017 +## +## This source code is licensed under the MIT-style license found in the +## LICENSE file in the root directory of this source tree +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +import torch +import torch.nn as nn + +from ..nn import Encoding, View, Normalize +from . import resnet + +__all__ = ['DeepTen', 'get_deepten', 'get_deepten_resnet50_minc'] + +class DeepTen(nn.Module): + def __init__(self, nclass, backbone): + super(DeepTen, self).__init__() + self.backbone = backbone + # copying modules from pretrained models + if self.backbone == 'resnet50': + self.pretrained = resnet.resnet50(pretrained=True, dilated=False) + elif self.backbone == 'resnet101': + self.pretrained = resnet.resnet101(pretrained=True, dilated=False) + elif self.backbone == 'resnet152': + self.pretrained = resnet.resnet152(pretrained=True, dilated=False) + else: + raise RuntimeError('unknown backbone: {}'.format(self.backbone)) + n_codes = 32 + self.head = nn.Sequential( + nn.Conv2d(2048, 128, 1), + nn.BatchNorm2d(128), + nn.ReLU(inplace=True), + Encoding(D=128,K=n_codes), + View(-1, 128*n_codes), + Normalize(), + nn.Linear(128*n_codes, nclass), + ) + + def forward(self, x): + _, _, h, w = x.size() + x = self.pretrained.conv1(x) + x = self.pretrained.bn1(x) + x = self.pretrained.relu(x) + x = self.pretrained.maxpool(x) + x = self.pretrained.layer1(x) + x = self.pretrained.layer2(x) + x = self.pretrained.layer3(x) + x = self.pretrained.layer4(x) + return self.head(x) + +def get_deepten(dataset='pascal_voc', backbone='resnet50', pretrained=False, + root='~/.encoding/models', **kwargs): + r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network" + `_ + Parameters + ---------- + dataset : str, default pascal_voc + The dataset that model pretrained on. (pascal_voc, ade20k) + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.encoding/models' + Location for keeping the model parameters. + Examples + -------- + >>> model = get_deepten(dataset='minc', backbone='resnet50', pretrained=False) + >>> print(model) + """ + from ..datasets import datasets, acronyms + model = DeepTen(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs) + if pretrained: + from .model_store import get_model_file + model.load_state_dict(torch.load( + get_model_file('deepten_%s_%s'%(backbone, acronyms[dataset]), root=root))) + return model + +def get_deepten_resnet50_minc(pretrained=False, root='~/.encoding/models', **kwargs): + r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network" + `_ + Parameters + ---------- + pretrained : bool, default False + Whether to load the pretrained weights for model. + root : str, default '~/.encoding/models' + Location for keeping the model parameters. + + + Examples + -------- + >>> model = get_deepten_resnet50_minc(pretrained=True) + >>> print(model) + """ + return get_deepten(dataset='minc', backbone='resnet50', pretrained=pretrained, + root=root, **kwargs) diff --git a/encoding/models/encnet.py b/encoding/models/encnet.py index b69d2913..9d7e0207 100644 --- a/encoding/models/encnet.py +++ b/encoding/models/encnet.py @@ -9,9 +9,9 @@ import torch.nn as nn import torch.nn.functional as F -import encoding from .base import BaseNet from .fcn import FCNHead +from ..nn import SyncBatchNorm, Encoding, Mean __all__ = ['EncNet', 'EncModule', 'get_encnet', 'get_encnet_resnet50_pcontext', 'get_encnet_resnet101_pcontext', 'get_encnet_resnet50_ade', @@ -19,7 +19,7 @@ class EncNet(BaseNet): def __init__(self, nclass, backbone, aux=True, se_loss=True, lateral=False, - norm_layer=nn.BatchNorm2d, **kwargs): + norm_layer=SyncBatchNorm, **kwargs): super(EncNet, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) self.head = EncHead(2048, self.nclass, se_loss=se_loss, @@ -33,10 +33,10 @@ def forward(self, x): features = self.base_forward(x) x = list(self.head(*features)) - x[0] = F.upsample(x[0], imsize, **self._up_kwargs) + x[0] = F.interpolate(x[0], imsize, **self._up_kwargs) if self.aux: auxout = self.auxlayer(features[2]) - auxout = F.upsample(auxout, imsize, **self._up_kwargs) + auxout = F.interpolate(auxout, imsize, **self._up_kwargs) x.append(auxout) return tuple(x) @@ -49,10 +49,10 @@ def __init__(self, in_channels, nclass, ncodes=32, se_loss=True, norm_layer=None nn.Conv2d(in_channels, in_channels, 1, bias=False), norm_layer(in_channels), nn.ReLU(inplace=True), - encoding.nn.Encoding(D=in_channels, K=ncodes), - encoding.nn.BatchNorm1d(ncodes), + Encoding(D=in_channels, K=ncodes), + norm_layer(ncodes), nn.ReLU(inplace=True), - encoding.nn.Mean(dim=1)) + Mean(dim=1)) self.fc = nn.Sequential( nn.Linear(in_channels, in_channels), nn.Sigmoid()) @@ -134,14 +134,9 @@ def get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False, >>> model = get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False) >>> print(model) """ - acronyms = { - 'pascal_voc': 'voc', - 'ade20k': 'ade', - 'pcontext': 'pcontext', - } kwargs['lateral'] = True if dataset.lower().startswith('p') else False # infer number of classes - from ..datasets import datasets + from ..datasets import datasets, acronyms model = EncNet(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) if pretrained: from .model_store import get_model_file diff --git a/encoding/models/fcn.py b/encoding/models/fcn.py index 47f4c129..6c030b87 100644 --- a/encoding/models/fcn.py +++ b/encoding/models/fcn.py @@ -8,7 +8,8 @@ import numpy as np import torch import torch.nn as nn -from torch.nn.functional import upsample +from torch.nn.functional import interpolate +from ..nn import ConcurrentModule, SyncBatchNorm from .base import BaseNet @@ -38,9 +39,10 @@ class FCN(BaseNet): >>> model = FCN(nclass=21, backbone='resnet50') >>> print(model) """ - def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): + def __init__(self, nclass, backbone, aux=True, se_loss=False, with_global=False, + norm_layer=SyncBatchNorm, **kwargs): super(FCN, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) - self.head = FCNHead(2048, nclass, norm_layer) + self.head = FCNHead(2048, nclass, norm_layer, self._up_kwargs, with_global) if aux: self.auxlayer = FCNHead(1024, nclass, norm_layer) @@ -49,24 +51,59 @@ def forward(self, x): _, _, c3, c4 = self.base_forward(x) x = self.head(c4) - x = upsample(x, imsize, **self._up_kwargs) + x = interpolate(x, imsize, **self._up_kwargs) outputs = [x] if self.aux: auxout = self.auxlayer(c3) - auxout = upsample(auxout, imsize, **self._up_kwargs) + auxout = interpolate(auxout, imsize, **self._up_kwargs) outputs.append(auxout) return tuple(outputs) + +class Identity(nn.Module): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, x): + return x + +class GlobalPooling(nn.Module): + def __init__(self, in_channels, out_channels, norm_layer, up_kwargs): + super(GlobalPooling, self).__init__() + self._up_kwargs = up_kwargs + self.gap = nn.Sequential(nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels, out_channels, 1, bias=False), + norm_layer(out_channels), + nn.ReLU(True)) + + def forward(self, x): + _, _, h, w = x.size() + pool = self.gap(x) + return interpolate(pool, (h,w), **self._up_kwargs) + class FCNHead(nn.Module): - def __init__(self, in_channels, out_channels, norm_layer): + def __init__(self, in_channels, out_channels, norm_layer, up_kwargs={}, with_global=False): super(FCNHead, self).__init__() inter_channels = in_channels // 4 - self.conv5 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), - norm_layer(inter_channels), - nn.ReLU(), - nn.Dropout2d(0.1, False), - nn.Conv2d(inter_channels, out_channels, 1)) + self._up_kwargs = up_kwargs + if with_global: + self.conv5 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), + norm_layer(inter_channels), + nn.ReLU(), + ConcurrentModule([ + Identity(), + GlobalPooling(inter_channels, inter_channels, + norm_layer, self._up_kwargs), + ]), + nn.Dropout2d(0.1, False), + nn.Conv2d(2*inter_channels, out_channels, 1)) + else: + self.conv5 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), + norm_layer(inter_channels), + nn.ReLU(), + nn.Dropout2d(0.1, False), + nn.Conv2d(inter_channels, out_channels, 1)) def forward(self, x): return self.conv5(x) @@ -89,14 +126,8 @@ def get_fcn(dataset='pascal_voc', backbone='resnet50', pretrained=False, >>> model = get_fcn(dataset='pascal_voc', backbone='resnet50', pretrained=False) >>> print(model) """ - acronyms = { - 'pascal_voc': 'voc', - 'pascal_aug': 'voc', - 'pcontext': 'pcontext', - 'ade20k': 'ade', - } # infer number of classes - from ..datasets import datasets, VOCSegmentation, VOCAugSegmentation, ADE20KSegmentation + from ..datasets import datasets, acronyms model = FCN(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) if pretrained: from .model_store import get_model_file diff --git a/encoding/models/model_store.py b/encoding/models/model_store.py index e9f80999..daa1b237 100644 --- a/encoding/models/model_store.py +++ b/encoding/models/model_store.py @@ -7,10 +7,12 @@ from ..utils import download, check_sha1 _model_sha1 = {name: checksum for checksum, name in [ - ('ebb6acbbd1d1c90b7f446ae59d30bf70c74febc1', 'resnet50'), + ('25c4b50959ef024fcc050213a06b614899f94b3d', 'resnet50'), ('2a57e44de9c853fa015b172309a1ee7e2d0e4e2a', 'resnet101'), ('0d43d698c66aceaa2bc0309f55efdd7ff4b143af', 'resnet152'), - ('2e22611a7f3992ebdee6726af169991bc26d7363', 'deepten_minc'), + ('da4785cfc837bf00ef95b52fb218feefe703011f', 'wideresnet38'), + ('b41562160173ee2e979b795c551d3c7143b1e5b5', 'wideresnet50'), + ('1225f149519c7a0113c43a056153c1bb15468ac0', 'deepten_resnet50_minc'), ('662e979de25a389f11c65e9f1df7e06c2c356381', 'fcn_resnet50_ade'), ('eeed8e582f0fdccdba8579e7490570adc6d85c7c', 'fcn_resnet50_pcontext'), ('54f70c772505064e30efd1ddd3a14e1759faa363', 'psp_resnet50_ade'), diff --git a/encoding/models/model_zoo.py b/encoding/models/model_zoo.py index 135d410e..d660acc8 100644 --- a/encoding/models/model_zoo.py +++ b/encoding/models/model_zoo.py @@ -1,8 +1,11 @@ # pylint: disable=wildcard-import, unused-wildcard-import +from .resnet import * +from .cifarresnet import * from .fcn import * from .psp import * from .encnet import * +from .deepten import * __all__ = ['get_model'] @@ -25,6 +28,13 @@ def get_model(name, **kwargs): The model. """ models = { + 'resnet18': resnet18, + 'resnet34': resnet34, + 'resnet50': resnet50, + 'resnet101': resnet101, + 'resnet152': resnet152, + 'cifar_resnet20': cifar_resnet20, + 'deepten_resnet50_minc': get_deepten_resnet50_minc, 'fcn_resnet50_pcontext': get_fcn_resnet50_pcontext, 'encnet_resnet50_pcontext': get_encnet_resnet50_pcontext, 'encnet_resnet101_pcontext': get_encnet_resnet101_pcontext, @@ -35,6 +45,6 @@ def get_model(name, **kwargs): } name = name.lower() if name not in models: - raise ValueError('%s\n\t%s' % (str(e), '\n\t'.join(sorted(models.keys())))) + raise ValueError('%s\n\t%s' % (str(name), '\n\t'.join(sorted(models.keys())))) net = models[name](**kwargs) return net diff --git a/encoding/models/psp.py b/encoding/models/psp.py index 89047f64..a4a2620e 100644 --- a/encoding/models/psp.py +++ b/encoding/models/psp.py @@ -8,7 +8,7 @@ import numpy as np import torch import torch.nn as nn -from torch.nn.functional import upsample +from torch.nn.functional import interpolate from .base import BaseNet from .fcn import FCNHead @@ -27,11 +27,11 @@ def forward(self, x): outputs = [] x = self.head(c4) - x = upsample(x, (h,w), **self._up_kwargs) + x = interpolate(x, (h,w), **self._up_kwargs) outputs.append(x) if self.aux: auxout = self.auxlayer(c3) - auxout = upsample(auxout, (h,w), **self._up_kwargs) + auxout = interpolate(auxout, (h,w), **self._up_kwargs) outputs.append(auxout) return tuple(outputs) @@ -52,13 +52,8 @@ def forward(self, x): def get_psp(dataset='pascal_voc', backbone='resnet50', pretrained=False, root='~/.encoding/models', **kwargs): - acronyms = { - 'pascal_voc': 'voc', - 'pascal_aug': 'voc', - 'ade20k': 'ade', - } # infer number of classes - from ..datasets import datasets + from ..datasets import datasets, acronyms model = PSP(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) if pretrained: from .model_store import get_model_file diff --git a/encoding/dilated/resnet.py b/encoding/models/resnet.py similarity index 87% rename from encoding/dilated/resnet.py rename to encoding/models/resnet.py index e2feaa83..0908a062 100644 --- a/encoding/dilated/resnet.py +++ b/encoding/models/resnet.py @@ -4,6 +4,9 @@ import torch.utils.model_zoo as model_zoo import torch.nn as nn +from ..nn import GlobalAvgPool2d +from ..models.model_store import get_model_file + __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'BasicBlock', 'Bottleneck'] @@ -132,7 +135,7 @@ class ResNet(nn.Module): - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions." """ # pylint: disable=unused-variable - def __init__(self, block, layers, num_classes=1000, dilated=True, + def __init__(self, block, layers, num_classes=1000, dilated=False, multi_grid=False, deep_base=True, norm_layer=nn.BatchNorm2d): self.inplanes = 128 if deep_base else 64 super(ResNet, self).__init__() @@ -157,14 +160,19 @@ def __init__(self, block, layers, num_classes=1000, dilated=True, if dilated: self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer) - self.layer4 = self._make_layer(block, 512, layers[3], stride=1, - dilation=4, norm_layer=norm_layer) + if multi_grid: + self.layer4 = self._make_layer(block, 512, layers[3], stride=1, + dilation=4, norm_layer=norm_layer, + multi_grid=True) + else: + self.layer4 = self._make_layer(block, 512, layers[3], stride=1, + dilation=4, norm_layer=norm_layer) else: self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer) - self.avgpool = nn.AvgPool2d(7, stride=1) + self.avgpool = GlobalAvgPool2d() self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): @@ -175,7 +183,7 @@ def __init__(self, block, layers, num_classes=1000, dilated=True, m.weight.data.fill_(1) m.bias.data.zero_() - def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None): + def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None, multi_grid=False): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( @@ -185,7 +193,11 @@ def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=No ) layers = [] - if dilation == 1 or dilation == 2: + multi_dilations = [4, 8, 16] + if multi_grid: + layers.append(block(self.inplanes, planes, stride, dilation=multi_dilations[0], + downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer)) + elif dilation == 1 or dilation == 2: layers.append(block(self.inplanes, planes, stride, dilation=1, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer)) elif dilation == 4: @@ -196,8 +208,12 @@ def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=No self.inplanes = planes * block.expansion for i in range(1, blocks): - layers.append(block(self.inplanes, planes, dilation=dilation, previous_dilation=dilation, - norm_layer=norm_layer)) + if multi_grid: + layers.append(block(self.inplanes, planes, dilation=multi_dilations[i], + previous_dilation=dilation, norm_layer=norm_layer)) + else: + layers.append(block(self.inplanes, planes, dilation=dilation, previous_dilation=dilation, + norm_layer=norm_layer)) return nn.Sequential(*layers) @@ -251,7 +267,6 @@ def resnet50(pretrained=False, root='~/.encoding/models', **kwargs): """ model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) if pretrained: - from ..models.model_store import get_model_file model.load_state_dict(torch.load( get_model_file('resnet50', root=root)), strict=False) return model @@ -265,7 +280,6 @@ def resnet101(pretrained=False, root='~/.encoding/models', **kwargs): """ model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) if pretrained: - from ..models.model_store import get_model_file model.load_state_dict(torch.load( get_model_file('resnet101', root=root)), strict=False) return model @@ -279,7 +293,6 @@ def resnet152(pretrained=False, root='~/.encoding/models', **kwargs): """ model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) if pretrained: - from ..models.model_store import get_model_file model.load_state_dict(torch.load( get_model_file('resnet152', root=root)), strict=False) return model diff --git a/encoding/nn/__init__.py b/encoding/nn/__init__.py index 5d9769e0..58d9e3ec 100644 --- a/encoding/nn/__init__.py +++ b/encoding/nn/__init__.py @@ -12,3 +12,4 @@ from .encoding import * from .syncbn import * from .customize import * +from .loss import * diff --git a/encoding/nn/comm.py b/encoding/nn/comm.py deleted file mode 100644 index b64bf6ba..00000000 --- a/encoding/nn/comm.py +++ /dev/null @@ -1,131 +0,0 @@ -# -*- coding: utf-8 -*- -# File : comm.py -# Author : Jiayuan Mao -# Email : maojiayuan@gmail.com -# Date : 27/01/2018 -# -# This file is part of Synchronized-BatchNorm-PyTorch. -# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch -# Distributed under MIT License. - -import queue -import collections -import threading - -__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] - - -class FutureResult(object): - """A thread-safe future implementation. Used only as one-to-one pipe.""" - - def __init__(self): - self._result = None - self._lock = threading.Lock() - self._cond = threading.Condition(self._lock) - - def put(self, result): - with self._lock: - assert self._result is None, 'Previous result has\'t been fetched.' - self._result = result - self._cond.notify() - - def get(self): - with self._lock: - if self._result is None: - self._cond.wait() - - res = self._result - self._result = None - return res - - -_MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) -_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) - - -class SlavePipe(_SlavePipeBase): - """Pipe for master-slave communication.""" - - def run_slave(self, msg): - self.queue.put((self.identifier, msg)) - ret = self.result.get() - self.queue.put(True) - return ret - - -class SyncMaster(object): - """An abstract `SyncMaster` object. - - - During the replication, as the data parallel will trigger an callback of each module, all slave devices should - call `register(id)` and obtain an `SlavePipe` to communicate with the master. - - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, - and passed to a registered callback. - - After receiving the messages, the master device should gather the information and determine to message passed - back to each slave devices. - """ - - def __init__(self, master_callback): - """ - - Args: - master_callback: a callback to be invoked after having collected messages from slave devices. - """ - self._master_callback = master_callback - self._queue = queue.Queue() - self._registry = collections.OrderedDict() - self._activated = False - - def register_slave(self, identifier): - """ - Register an slave device. - - Args: - identifier: an identifier, usually is the device id. - - Returns: a `SlavePipe` object which can be used to communicate with the master device. - - """ - if self._activated: - assert self._queue.empty(), 'Queue is not clean before next initialization.' - self._activated = False - self._registry.clear() - future = FutureResult() - self._registry[identifier] = _MasterRegistry(future) - return SlavePipe(identifier, self._queue, future) - - def run_master(self, master_msg): - """ - Main entry for the master device in each forward pass. - The messages were first collected from each devices (including the master device), and then - an callback will be invoked to compute the message to be sent back to each devices - (including the master device). - - Args: - master_msg: the message that the master want to send to itself. This will be placed as the first - message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. - - Returns: the message to be sent back to the master device. - - """ - self._activated = True - - intermediates = [(0, master_msg)] - for i in range(self.nr_slaves): - intermediates.append(self._queue.get()) - - results = self._master_callback(intermediates) - assert results[0][0] == 0, 'The first result should belongs to the master.' - - for i, res in results: - if i == 0: - continue - self._registry[i].result.put(res) - - for i in range(self.nr_slaves): - assert self._queue.get() is True - - return results[0][1] - - @property - def nr_slaves(self): - return len(self._registry) diff --git a/encoding/nn/customize.py b/encoding/nn/customize.py index 5df9c638..5c3a94c3 100644 --- a/encoding/nn/customize.py +++ b/encoding/nn/customize.py @@ -10,17 +10,27 @@ """Encoding Custermized NN Module""" import torch -from torch.nn import Module, Sequential, Conv2d, ReLU, AdaptiveAvgPool2d, \ - NLLLoss, BCELoss, CrossEntropyLoss, AvgPool2d, MaxPool2d, Parameter +import torch.nn as nn from torch.nn import functional as F from torch.autograd import Variable torch_ver = torch.__version__[:3] -__all__ = ['GramMatrix', 'SegmentationLosses', 'View', 'Sum', 'Mean', - 'Normalize', 'PyramidPooling'] +__all__ = ['GlobalAvgPool2d', 'GramMatrix', + 'View', 'Sum', 'Mean', 'Normalize', 'ConcurrentModule', + 'PyramidPooling'] -class GramMatrix(Module): +class GlobalAvgPool2d(nn.Module): + def __init__(self): + """Global average pooling over the input's spatial dimensions""" + super(GlobalAvgPool2d, self).__init__() + + def forward(self, inputs): + return F.adaptive_avg_pool2d(inputs, 1).view(inputs.size(0), -1) + + + +class GramMatrix(nn.Module): r""" Gram Matrix for a 4D convolutional featuremaps as a mini-batch .. math:: @@ -33,60 +43,7 @@ def forward(self, y): gram = features.bmm(features_t) / (ch * h * w) return gram -def softmax_crossentropy(input, target, weight, size_average, ignore_index, reduce=True): - return F.nll_loss(F.log_softmax(input, 1), target, weight, - size_average, ignore_index, reduce) - -class SegmentationLosses(CrossEntropyLoss): - """2D Cross Entropy Loss with Auxilary Loss""" - def __init__(self, se_loss=False, se_weight=0.2, nclass=-1, - aux=False, aux_weight=0.4, weight=None, - size_average=True, ignore_index=-1): - super(SegmentationLosses, self).__init__(weight, size_average, ignore_index) - self.se_loss = se_loss - self.aux = aux - self.nclass = nclass - self.se_weight = se_weight - self.aux_weight = aux_weight - self.bceloss = BCELoss(weight, size_average) - - def forward(self, *inputs): - if not self.se_loss and not self.aux: - return super(SegmentationLosses, self).forward(*inputs) - elif not self.se_loss: - pred1, pred2, target = tuple(inputs) - loss1 = super(SegmentationLosses, self).forward(pred1, target) - loss2 = super(SegmentationLosses, self).forward(pred2, target) - return loss1 + self.aux_weight * loss2 - elif not self.aux: - pred, se_pred, target = tuple(inputs) - se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred) - loss1 = super(SegmentationLosses, self).forward(pred, target) - loss2 = self.bceloss(torch.sigmoid(se_pred), se_target) - return loss1 + self.se_weight * loss2 - else: - pred1, se_pred, pred2, target = tuple(inputs) - se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1) - loss1 = super(SegmentationLosses, self).forward(pred1, target) - loss2 = super(SegmentationLosses, self).forward(pred2, target) - loss3 = self.bceloss(torch.sigmoid(se_pred), se_target) - return loss1 + self.aux_weight * loss2 + self.se_weight * loss3 - - @staticmethod - def _get_batch_label_vector(target, nclass): - # target is a 3D Variable BxHxW, output is 2D BxnClass - batch = target.size(0) - tvect = Variable(torch.zeros(batch, nclass)) - for i in range(batch): - hist = torch.histc(target[i].cpu().data.float(), - bins=nclass, min=0, - max=nclass-1) - vect = hist>0 - tvect[i] = vect - return tvect - - -class View(Module): +class View(nn.Module): """Reshape the input into different size, an inplace operator, support SelfParallel mode. """ @@ -101,7 +58,7 @@ def forward(self, input): return input.view(self.size) -class Sum(Module): +class Sum(nn.Module): def __init__(self, dim, keep_dim=False): super(Sum, self).__init__() self.dim = dim @@ -111,7 +68,7 @@ def forward(self, input): return input.sum(self.dim, self.keep_dim) -class Mean(Module): +class Mean(nn.Module): def __init__(self, dim, keep_dim=False): super(Mean, self).__init__() self.dim = dim @@ -121,7 +78,7 @@ def forward(self, input): return input.mean(self.dim, self.keep_dim) -class Normalize(Module): +class Normalize(nn.Module): r"""Performs :math:`L_p` normalization of inputs over specified dimension. Does: @@ -148,39 +105,54 @@ def __init__(self, p=2, dim=1): def forward(self, x): return F.normalize(x, self.p, self.dim, eps=1e-8) +class ConcurrentModule(nn.ModuleList): + r"""Feed to a list of modules concurrently. + The outputs of the layers are concatenated at channel dimension. + + Args: + modules (iterable, optional): an iterable of modules to add + """ + def __init__(self, modules=None): + super(ConcurrentModule, self).__init__(modules) + + def forward(self, x): + outputs = [] + for layer in self: + outputs.append(layer(x)) + return torch.cat(outputs, 1) -class PyramidPooling(Module): +class PyramidPooling(nn.Module): """ Reference: Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* """ def __init__(self, in_channels, norm_layer, up_kwargs): super(PyramidPooling, self).__init__() - self.pool1 = AdaptiveAvgPool2d(1) - self.pool2 = AdaptiveAvgPool2d(2) - self.pool3 = AdaptiveAvgPool2d(3) - self.pool4 = AdaptiveAvgPool2d(6) + self.pool1 = nn.AdaptiveAvgPool2d(1) + self.pool2 = nn.AdaptiveAvgPool2d(2) + self.pool3 = nn.AdaptiveAvgPool2d(3) + self.pool4 = nn.AdaptiveAvgPool2d(6) out_channels = int(in_channels/4) - self.conv1 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), + self.conv1 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), norm_layer(out_channels), - ReLU(True)) - self.conv2 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), + nn.ReLU(True)) + self.conv2 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), norm_layer(out_channels), - ReLU(True)) - self.conv3 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), + nn.ReLU(True)) + self.conv3 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), norm_layer(out_channels), - ReLU(True)) - self.conv4 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), + nn.ReLU(True)) + self.conv4 = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1, bias=False), norm_layer(out_channels), - ReLU(True)) - # bilinear upsample options + nn.ReLU(True)) + # bilinear interpolate options self._up_kwargs = up_kwargs def forward(self, x): _, _, h, w = x.size() - feat1 = F.upsample(self.conv1(self.pool1(x)), (h, w), **self._up_kwargs) - feat2 = F.upsample(self.conv2(self.pool2(x)), (h, w), **self._up_kwargs) - feat3 = F.upsample(self.conv3(self.pool3(x)), (h, w), **self._up_kwargs) - feat4 = F.upsample(self.conv4(self.pool4(x)), (h, w), **self._up_kwargs) + feat1 = F.interpolate(self.conv1(self.pool1(x)), (h, w), **self._up_kwargs) + feat2 = F.interpolate(self.conv2(self.pool2(x)), (h, w), **self._up_kwargs) + feat3 = F.interpolate(self.conv3(self.pool3(x)), (h, w), **self._up_kwargs) + feat4 = F.interpolate(self.conv4(self.pool4(x)), (h, w), **self._up_kwargs) return torch.cat((x, feat1, feat2, feat3, feat4), 1) diff --git a/encoding/nn/loss.py b/encoding/nn/loss.py new file mode 100644 index 00000000..a84f18f9 --- /dev/null +++ b/encoding/nn/loss.py @@ -0,0 +1,167 @@ +import torch +import torch.nn.functional as F +import torch.nn as nn +from torch.autograd import Variable +import numpy as np +__all__ = ['SegmentationLosses', 'OhemCrossEntropy2d', 'OHEMSegmentationLosses'] + +class SegmentationLosses(nn.CrossEntropyLoss): + """2D Cross Entropy Loss with Auxilary Loss""" + def __init__(self, se_loss=False, se_weight=0.2, nclass=-1, + aux=False, aux_weight=0.4, weight=None, + ignore_index=-1): + super(SegmentationLosses, self).__init__(weight, None, ignore_index) + self.se_loss = se_loss + self.aux = aux + self.nclass = nclass + self.se_weight = se_weight + self.aux_weight = aux_weight + self.bceloss = nn.BCELoss(weight) + + def forward(self, *inputs): + if not self.se_loss and not self.aux: + return super(SegmentationLosses, self).forward(*inputs) + elif not self.se_loss: + pred1, pred2, target = tuple(inputs) + loss1 = super(SegmentationLosses, self).forward(pred1, target) + loss2 = super(SegmentationLosses, self).forward(pred2, target) + return loss1 + self.aux_weight * loss2 + elif not self.aux: + pred, se_pred, target = tuple(inputs) + se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred) + loss1 = super(SegmentationLosses, self).forward(pred, target) + loss2 = self.bceloss(torch.sigmoid(se_pred), se_target) + return loss1 + self.se_weight * loss2 + else: + pred1, se_pred, pred2, target = tuple(inputs) + se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1) + loss1 = super(SegmentationLosses, self).forward(pred1, target) + loss2 = super(SegmentationLosses, self).forward(pred2, target) + loss3 = self.bceloss(torch.sigmoid(se_pred), se_target) + return loss1 + self.aux_weight * loss2 + self.se_weight * loss3 + + @staticmethod + def _get_batch_label_vector(target, nclass): + # target is a 3D Variable BxHxW, output is 2D BxnClass + batch = target.size(0) + tvect = Variable(torch.zeros(batch, nclass)) + for i in range(batch): + hist = torch.histc(target[i].cpu().data.float(), + bins=nclass, min=0, + max=nclass-1) + vect = hist>0 + tvect[i] = vect + return tvect + +# adapted from https://github.com/PkuRainBow/OCNet/blob/master/utils/loss.py +class OhemCrossEntropy2d(nn.Module): + def __init__(self, ignore_label=-1, thresh=0.7, min_kept=100000, use_weight=True): + super(OhemCrossEntropy2d, self).__init__() + self.ignore_label = ignore_label + self.thresh = float(thresh) + self.min_kept = int(min_kept) + if use_weight: + print("w/ class balance") + weight = torch.FloatTensor([0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, + 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, + 1.0865, 1.1529, 1.0507]) + self.criterion = torch.nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_label) + else: + print("w/o class balance") + self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_label) + + def forward(self, predict, target, weight=None): + """ + Args: + predict:(n, c, h, w) + target:(n, h, w) + weight (Tensor, optional): a manual rescaling weight given to each class. + If given, has to be a Tensor of size "nclasses" + """ + assert not target.requires_grad + assert predict.dim() == 4 + assert target.dim() == 3 + assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0)) + assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1)) + assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3)) + + n, c, h, w = predict.size() + input_label = target.data.cpu().numpy().ravel().astype(np.int32) + x = np.rollaxis(predict.data.cpu().numpy(), 1).reshape((c, -1)) + input_prob = np.exp(x - x.max(axis=0).reshape((1, -1))) + input_prob /= input_prob.sum(axis=0).reshape((1, -1)) + + valid_flag = input_label != self.ignore_label + valid_inds = np.where(valid_flag)[0] + label = input_label[valid_flag] + num_valid = valid_flag.sum() + if self.min_kept >= num_valid: + print('Labels: {}'.format(num_valid)) + elif num_valid > 0: + prob = input_prob[:,valid_flag] + pred = prob[label, np.arange(len(label), dtype=np.int32)] + threshold = self.thresh + if self.min_kept > 0: + index = pred.argsort() + threshold_index = index[ min(len(index), self.min_kept) - 1 ] + if pred[threshold_index] > self.thresh: + threshold = pred[threshold_index] + kept_flag = pred <= threshold + valid_inds = valid_inds[kept_flag] + + label = input_label[valid_inds].copy() + input_label.fill(self.ignore_label) + input_label[valid_inds] = label + valid_flag_new = input_label != self.ignore_label + # print(np.sum(valid_flag_new)) + target = Variable(torch.from_numpy(input_label.reshape(target.size())).long().cuda()) + + return self.criterion(predict, target) + +class OHEMSegmentationLosses(OhemCrossEntropy2d): + """2D Cross Entropy Loss with Auxilary Loss""" + def __init__(self, se_loss=False, se_weight=0.2, nclass=-1, + aux=False, aux_weight=0.4, weight=None, + ignore_index=-1): + super(OHEMSegmentationLosses, self).__init__(ignore_index) + self.se_loss = se_loss + self.aux = aux + self.nclass = nclass + self.se_weight = se_weight + self.aux_weight = aux_weight + self.bceloss = nn.BCELoss(weight) + + def forward(self, *inputs): + if not self.se_loss and not self.aux: + return super(OHEMSegmentationLosses, self).forward(*inputs) + elif not self.se_loss: + pred1, pred2, target = tuple(inputs) + loss1 = super(OHEMSegmentationLosses, self).forward(pred1, target) + loss2 = super(OHEMSegmentationLosses, self).forward(pred2, target) + return loss1 + self.aux_weight * loss2 + elif not self.aux: + pred, se_pred, target = tuple(inputs) + se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred) + loss1 = super(OHEMSegmentationLosses, self).forward(pred, target) + loss2 = self.bceloss(torch.sigmoid(se_pred), se_target) + return loss1 + self.se_weight * loss2 + else: + pred1, se_pred, pred2, target = tuple(inputs) + se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1) + loss1 = super(OHEMSegmentationLosses, self).forward(pred1, target) + loss2 = super(OHEMSegmentationLosses, self).forward(pred2, target) + loss3 = self.bceloss(torch.sigmoid(se_pred), se_target) + return loss1 + self.aux_weight * loss2 + self.se_weight * loss3 + + @staticmethod + def _get_batch_label_vector(target, nclass): + # target is a 3D Variable BxHxW, output is 2D BxnClass + batch = target.size(0) + tvect = Variable(torch.zeros(batch, nclass)) + for i in range(batch): + hist = torch.histc(target[i].cpu().data.float(), + bins=nclass, min=0, + max=nclass-1) + vect = hist>0 + tvect[i] = vect + return tvect diff --git a/encoding/nn/syncbn.py b/encoding/nn/syncbn.py index 7eb1db3a..be76b6d0 100644 --- a/encoding/nn/syncbn.py +++ b/encoding/nn/syncbn.py @@ -9,117 +9,23 @@ ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ """Synchronized Cross-GPU Batch Normalization Module""" -import collections -import threading +import warnings +try: + from queue import Queue +except ImportError: + from Queue import Queue + import torch -from torch.nn import Module, Sequential, Conv1d, Conv2d, ConvTranspose2d, \ - ReLU, Sigmoid, MaxPool2d, AvgPool2d, AdaptiveAvgPool2d, Dropout2d, Linear, \ - DataParallel from torch.nn.modules.batchnorm import _BatchNorm -from torch.nn.functional import batch_norm -from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast +from ..utils.misc import EncodingDeprecationWarning from ..functions import * -from ..parallel import allreduce -from .comm import SyncMaster - - -__all__ = ['BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'Module', 'Sequential', 'Conv1d', - 'Conv2d', 'ConvTranspose2d', 'ReLU', 'Sigmoid', 'MaxPool2d', 'AvgPool2d', - 'AdaptiveAvgPool2d', 'Dropout2d', 'Linear'] - -class _SyncBatchNorm(_BatchNorm): - def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): - super(_SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine) - - self._sync_master = SyncMaster(self._data_parallel_master) - self._parallel_id = None - self._slave_pipe = None - - def forward(self, input): - if not self.training: - return batch_norm( - input, self.running_mean, self.running_var, self.weight, self.bias, - self.training, self.momentum, self.eps) - - # Resize the input to (B, C, -1). - input_shape = input.size() - input = input.view(input_shape[0], self.num_features, -1) - - # sum(x) and sum(x^2) - N = input.size(0) * input.size(2) - xsum, xsqsum = sum_square(input) - - # all-reduce for global sum(x) and sum(x^2) - if self._parallel_id == 0: - mean, inv_std = self._sync_master.run_master(_ChildMessage(xsum, xsqsum, N)) - else: - mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(xsum, xsqsum, N)) - # forward - return batchnormtrain(input, mean, 1.0/inv_std, self.weight, self.bias).view(input_shape) - - def __data_parallel_replicate__(self, ctx, copy_id): - self._parallel_id = copy_id - - # parallel_id == 0 means master device. - if self._parallel_id == 0: - ctx.sync_master = self._sync_master - else: - self._slave_pipe = ctx.sync_master.register_slave(copy_id) - - def _data_parallel_master(self, intermediates): - """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" - - # Always using same "device order" makes the ReduceAdd operation faster. - # Thanks to:: Tete Xiao (http://tetexiao.com/) - intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) - - to_reduce = [i[1][:2] for i in intermediates] - to_reduce = [j for i in to_reduce for j in i] # flatten - target_gpus = [i[1].sum.get_device() for i in intermediates] - - sum_size = sum([i[1].sum_size for i in intermediates]) - sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) - mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) - broadcasted = Broadcast.apply(target_gpus, mean, inv_std) - outputs = [] - for i, rec in enumerate(intermediates): - outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2]))) +__all__ = ['SyncBatchNorm', 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d'] - return outputs - def _compute_mean_std(self, sum_, ssum, size): - """Compute the mean and standard-deviation with sum and square-sum. This method - also maintains the moving average on the master device.""" - assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' - mean = sum_ / size - sumvar = ssum - sum_ * mean - unbias_var = sumvar / (size - 1) - bias_var = sumvar / size - - self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data - self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data - - return mean, (bias_var + self.eps) ** -0.5 - - -# API adapted from https://github.com/vacancy/Synchronized-BatchNorm-PyTorch -_ChildMessage = collections.namedtuple('Message', ['sum', 'ssum', 'sum_size']) -_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) - - -class BatchNorm1d(_SyncBatchNorm): - r"""Please see the docs in :class:`encoding.nn.BatchNorm2d`""" - def _check_input_dim(self, input): - if input.dim() != 2 and input.dim() != 3: - raise ValueError('expected 2D or 3D input (got {}D input)' - .format(input.dim())) - super(BatchNorm2d, self)._check_input_dim(input) - - -class BatchNorm2d(_SyncBatchNorm): +class SyncBatchNorm(_BatchNorm): r"""Cross-GPU Synchronized Batch normalization (SyncBN) Standard BN [1]_ implementation only normalize the data within each device (GPU). @@ -127,11 +33,6 @@ class BatchNorm2d(_SyncBatchNorm): We follow the sync-onece implmentation described in the paper [2]_ . Please see the design idea in the `notes <./notes/syncbn.html>`_. - .. note:: - We adapt the awesome python API from another `PyTorch SyncBN Implementation - `_ and provide - efficient CUDA backend. - .. math:: y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta @@ -155,8 +56,12 @@ class BatchNorm2d(_SyncBatchNorm): Default: 1e-5 momentum: the value used for the running_mean and running_var computation. Default: 0.1 - affine: a boolean value that when set to ``True``, gives the layer learnable - affine parameters. Default: ``True`` + sync: a boolean value that when set to ``True``, synchronize across + different gpus. Default: ``True`` + activation : str + Name of the activation functions, one of: `leaky_relu` or `none`. + slope : float + Negative slope for the `leaky_relu` activation. Shape: - Input: :math:`(N, C, H, W)` @@ -167,79 +72,89 @@ class BatchNorm2d(_SyncBatchNorm): .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018* Examples: - >>> m = BatchNorm2d(100) + >>> m = SyncBatchNorm(100) >>> net = torch.nn.DataParallel(m) - >>> encoding.parallel.patch_replication_callback(net) >>> output = net(input) """ - def _check_input_dim(self, input): - if input.dim() != 4: - raise ValueError('expected 4D input (got {}D input)' - .format(input.dim())) - super(BatchNorm2d, self)._check_input_dim(input) - - -class BatchNorm3d(_SyncBatchNorm): - r"""Please see the docs in :class:`encoding.nn.BatchNorm2d`""" - def _check_input_dim(self, input): - if input.dim() != 5: - raise ValueError('expected 5D input (got {}D input)' - .format(input.dim())) - super(BatchNorm3d, self)._check_input_dim(input) - - -class SharedTensor(object): - """Shared Tensor for cross GPU all reduce operation""" - def __init__(self, nGPUs): - self.mutex = threading.Lock() - self.all_tasks_done = threading.Condition(self.mutex) - self.nGPUs = nGPUs - self._clear() - - def _clear(self): - self.N = 0 - self.dict = {} - self.push_tasks = self.nGPUs - self.reduce_tasks = self.nGPUs - - def push(self, *inputs): - # push from device - with self.mutex: - if self.push_tasks == 0: - self._clear() - self.N += inputs[0] - igpu = inputs[1] - self.dict[igpu] = inputs[2:] - #idx = self.nGPUs - self.push_tasks - self.push_tasks -= 1 - with self.all_tasks_done: - if self.push_tasks == 0: - self.all_tasks_done.notify_all() - while self.push_tasks: - self.all_tasks_done.wait() - - def pull(self, igpu): - # pull from device - with self.mutex: - if igpu == 0: - assert(len(self.dict) == self.nGPUs) - # flatten the tensors - self.list = [t for i in range(len(self.dict)) for t in self.dict[i]] - self.outlist = allreduce(2, *self.list) - self.reduce_tasks -= 1 - else: - self.reduce_tasks -= 1 - with self.all_tasks_done: - if self.reduce_tasks == 0: - self.all_tasks_done.notify_all() - while self.reduce_tasks: - self.all_tasks_done.wait() - # all reduce done - return self.N, self.outlist[2*igpu], self.outlist[2*igpu+1] - - def __len__(self): - return self.nGPUs - - def __repr__(self): - return ('SharedTensor') + def __init__(self, num_features, eps=1e-5, momentum=0.1, sync=True, activation="none", slope=0.01, + inplace=True): + super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=True) + self.activation = activation + self.inplace = False if activation == 'none' else inplace + #self.inplace = inplace + self.slope = slope + self.devices = list(range(torch.cuda.device_count())) + self.sync = sync if len(self.devices) > 1 else False + # Initialize queues + self.worker_ids = self.devices[1:] + self.master_queue = Queue(len(self.worker_ids)) + self.worker_queues = [Queue(1) for _ in self.worker_ids] + # running_exs + #self.register_buffer('running_exs', torch.ones(num_features)) + + def forward(self, x): + # Resize the input to (B, C, -1). + input_shape = x.size() + x = x.view(input_shape[0], self.num_features, -1) + if x.get_device() == self.devices[0]: + # Master mode + extra = { + "is_master": True, + "master_queue": self.master_queue, + "worker_queues": self.worker_queues, + "worker_ids": self.worker_ids + } + else: + # Worker mode + extra = { + "is_master": False, + "master_queue": self.master_queue, + "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())] + } + if self.inplace: + return inp_syncbatchnorm(x, self.weight, self.bias, self.running_mean, self.running_var, + extra, self.sync, self.training, self.momentum, self.eps, + self.activation, self.slope).view(input_shape) + else: + return syncbatchnorm(x, self.weight, self.bias, self.running_mean, self.running_var, + extra, self.sync, self.training, self.momentum, self.eps, + self.activation, self.slope).view(input_shape) + + def extra_repr(self): + if self.activation == 'none': + return 'sync={}'.format(self.sync) + else: + return 'sync={}, act={}, slope={}, inplace={}'.format( + self.sync, self.activation, self.slope, self.inplace + ) + +class BatchNorm1d(SyncBatchNorm): + r""" + .. warning:: + BatchNorm1d is deprecated in favor of :class:`encoding.nn.SyncBatchNorm`. + """ + def __init__(self, *args, **kwargs): + warnings.warn("encoding.nn.{} is now deprecated in favor of encoding.nn.{}." + .format('BatchNorm1d', SyncBatchNorm.__name__), EncodingDeprecationWarning) + super(BatchNorm1d, self).__init__(*args, **kwargs) + +class BatchNorm2d(SyncBatchNorm): + r""" + .. warning:: + BatchNorm2d is deprecated in favor of :class:`encoding.nn.SyncBatchNorm`. + """ + def __init__(self, *args, **kwargs): + warnings.warn("encoding.nn.{} is now deprecated in favor of encoding.nn.{}." + .format('BatchNorm2d', SyncBatchNorm.__name__), EncodingDeprecationWarning) + super(BatchNorm2d, self).__init__(*args, **kwargs) + +class BatchNorm3d(SyncBatchNorm): + r""" + .. warning:: + BatchNorm3d is deprecated in favor of :class:`encoding.nn.SyncBatchNorm`. + """ + def __init__(self, *args, **kwargs): + warnings.warn("encoding.nn.{} is now deprecated in favor of encoding.nn.{}." + .format('BatchNorm3d', SyncBatchNorm.__name__), EncodingDeprecationWarning) + super(BatchNorm3d, self).__init__(*args, **kwargs) diff --git a/encoding/parallel.py b/encoding/parallel.py index 21d51ec3..61a7b71d 100644 --- a/encoding/parallel.py +++ b/encoding/parallel.py @@ -51,7 +51,6 @@ def backward(ctx, *inputs): outputs = comm.broadcast_coalesced(results, ctx.target_gpus) return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors]) - class Reduce(Function): @staticmethod def forward(ctx, *inputs): @@ -98,7 +97,6 @@ def gather(self, outputs, output_device): def replicate(self, module, device_ids): modules = super(DataParallelModel, self).replicate(module, device_ids) - execute_replication_callbacks(modules) return modules @@ -133,7 +131,6 @@ def forward(self, inputs, *targets, **kwargs): replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs) return Reduce.apply(*outputs) / len(outputs) - #return self.gather(outputs, self.output_device).mean() def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None): @@ -188,62 +185,3 @@ def _worker(i, module, input, target, kwargs, device=None): raise output outputs.append(output) return outputs - - -########################################################################### -# Adapted from Synchronized-BatchNorm-PyTorch. -# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch -# -class CallbackContext(object): - pass - - -def execute_replication_callbacks(modules): - """ - Execute an replication callback `__data_parallel_replicate__` on each module created - by original replication. - - The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` - - Note that, as all modules are isomorphism, we assign each sub-module with a context - (shared among multiple copies of this module on different devices). - Through this context, different copies can share some information. - - We guarantee that the callback on the master copy (the first copy) will be called ahead - of calling the callback of any slave copies. - """ - master_copy = modules[0] - nr_modules = len(list(master_copy.modules())) - ctxs = [CallbackContext() for _ in range(nr_modules)] - - for i, module in enumerate(modules): - for j, m in enumerate(module.modules()): - if hasattr(m, '__data_parallel_replicate__'): - m.__data_parallel_replicate__(ctxs[j], i) - - -def patch_replication_callback(data_parallel): - """ - Monkey-patch an existing `DataParallel` object. Add the replication callback. - Useful when you have customized `DataParallel` implementation. - - Examples: - > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) - > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) - > patch_replication_callback(sync_bn) - # this is equivalent to - > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) - > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) - """ - - assert isinstance(data_parallel, DataParallel) - - old_replicate = data_parallel.replicate - - @functools.wraps(old_replicate) - def new_replicate(module, device_ids): - modules = old_replicate(module, device_ids) - execute_replication_callbacks(modules) - return modules - - data_parallel.replicate = new_replicate diff --git a/encoding/transforms/__init__.py b/encoding/transforms/__init__.py new file mode 100644 index 00000000..77521cb9 --- /dev/null +++ b/encoding/transforms/__init__.py @@ -0,0 +1,89 @@ +import torch +from torchvision.transforms import * + +def get_transform(dataset, large_test_crop=False): + normalize = Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + if dataset == 'imagenet': + transform_train = Compose([ + Resize(256), + RandomResizedCrop(224), + RandomHorizontalFlip(), + ColorJitter(0.4, 0.4, 0.4), + ToTensor(), + Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), + normalize, + ]) + if large_test_crop: + transform_val = Compose([ + Resize(366), + CenterCrop(320), + ToTensor(), + normalize, + ]) + else: + transform_val = Compose([ + Resize(256), + CenterCrop(224), + ToTensor(), + normalize, + ]) + elif dataset == 'minc': + transform_train = Compose([ + Resize(256), + RandomResizedCrop(224), + RandomHorizontalFlip(), + ColorJitter(0.4, 0.4, 0.4), + ToTensor(), + Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), + normalize, + ]) + transform_val = Compose([ + Resize(256), + CenterCrop(224), + ToTensor(), + normalize, + ]) + elif dataset == 'cifar10': + transform_train = transforms.Compose([ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), + (0.2023, 0.1994, 0.2010)), + ]) + transform_val = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), + (0.2023, 0.1994, 0.2010)), + ]) + return transform_train, transform_val + +_imagenet_pca = { + 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), + 'eigvec': torch.Tensor([ + [-0.5675, 0.7192, 0.4009], + [-0.5808, -0.0045, -0.8140], + [-0.5836, -0.6948, 0.4203], + ]) +} + +class Lighting(object): + """Lighting noise(AlexNet - style PCA - based noise)""" + + def __init__(self, alphastd, eigval, eigvec): + self.alphastd = alphastd + self.eigval = eigval + self.eigvec = eigvec + + def __call__(self, img): + if self.alphastd == 0: + return img + + alpha = img.new().resize_(3).normal_(0, self.alphastd) + rgb = self.eigvec.type_as(img).clone()\ + .mul(alpha.view(1, 3).expand(3, 3))\ + .mul(self.eigval.view(1, 3).expand(3, 3))\ + .sum(1).squeeze() + + return img.add(rgb.view(3, 1, 1).expand_as(img)) diff --git a/encoding/utils/__init__.py b/encoding/utils/__init__.py index ac5b20af..251276d9 100644 --- a/encoding/utils/__init__.py +++ b/encoding/utils/__init__.py @@ -12,9 +12,10 @@ from .lr_scheduler import LR_Scheduler from .metrics import SegmentationMetric, batch_intersection_union, batch_pix_accuracy from .pallete import get_mask_pallete -from .train_helper import get_selabel_vector, EMA +from .train_helper import * from .presets import load_image from .files import * +from .misc import * __all__ = ['LR_Scheduler', 'batch_pix_accuracy', 'batch_intersection_union', 'save_checkpoint', 'download', 'mkdir', 'check_sha1', 'load_image', diff --git a/encoding/utils/misc.py b/encoding/utils/misc.py new file mode 100644 index 00000000..5cb56063 --- /dev/null +++ b/encoding/utils/misc.py @@ -0,0 +1,8 @@ +import warnings + +__all__ = ['EncodingDeprecationWarning'] + +class EncodingDeprecationWarning(DeprecationWarning): + pass + +warnings.simplefilter('once', EncodingDeprecationWarning) diff --git a/encoding/utils/pallete.py b/encoding/utils/pallete.py index 0d757969..bdf3a800 100644 --- a/encoding/utils/pallete.py +++ b/encoding/utils/pallete.py @@ -19,7 +19,7 @@ def get_mask_pallete(npimg, dataset='detail'): out_img = Image.fromarray(npimg.squeeze().astype('uint8')) if dataset == 'ade20k': out_img.putpalette(adepallete) - elif dataset == 'cityscapes': + elif dataset == 'citys': out_img.putpalette(citypallete) elif dataset in ('detail', 'pascal_voc', 'pascal_aug'): out_img.putpalette(vocpallete) diff --git a/encoding/utils/presets.py b/encoding/utils/presets.py index 5024aa6e..2200a86d 100644 --- a/encoding/utils/presets.py +++ b/encoding/utils/presets.py @@ -4,13 +4,13 @@ import torch import torchvision.transforms as transform -__all__ = ['load_image', 'subtract_imagenet_mean_batch'] +__all__ = ['load_image'] input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225])]) -def load_image(filename, size=None, scale=None, keep_asp=True): +def load_image(filename, size=None, scale=None, keep_asp=True, transform=input_transform): """Load the image for demos""" img = Image.open(filename).convert('RGB') if size is not None: @@ -22,5 +22,6 @@ def load_image(filename, size=None, scale=None, keep_asp=True): elif scale is not None: img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS) - img = input_transform(img) + if transform: + img = transform(img) return img diff --git a/encoding/utils/train_helper.py b/encoding/utils/train_helper.py index 9ebc3b7e..168550b2 100644 --- a/encoding/utils/train_helper.py +++ b/encoding/utils/train_helper.py @@ -9,6 +9,12 @@ ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ import torch +import torch.nn as nn + +#from ..nn import SyncBatchNorm +from torch.nn.modules.batchnorm import _BatchNorm + +__all__ = ['get_selabel_vector'] def get_selabel_vector(target, nclass): r"""Get SE-Loss Label in a batch diff --git a/experiments/recognition/dataset/minc.py b/experiments/recognition/dataset/minc.py index 69b64381..3ac24431 100644 --- a/experiments/recognition/dataset/minc.py +++ b/experiments/recognition/dataset/minc.py @@ -49,7 +49,7 @@ def make_dataset(filename, datadir, class_to_idx): return images, labels -class MINCDataloder(data.Dataset): +class MINCDataset(data.Dataset): def __init__(self, root, train=True, transform=None): self.transform = transform classes, class_to_idx = find_classes(root + '/images') @@ -94,9 +94,9 @@ def __init__(self, args): normalize, ]) - trainset = MINCDataloder(root=os.path.expanduser('~/.encoding/data/minc-2500/'), + trainset = MINCDataset(root=os.path.expanduser('~/.encoding/data/minc-2500/'), train=True, transform=transform_train) - testset = MINCDataloder(root=os.path.expanduser('~/.encoding/data/minc-2500/'), + testset = MINCDataset(root=os.path.expanduser('~/.encoding/data/minc-2500/'), train=False, transform=transform_test) kwargs = {'num_workers': 8, 'pin_memory': True} if args.cuda else {} @@ -133,7 +133,7 @@ def __call__(self, img): if __name__ == "__main__": - trainset = MINCDataloder(root=os.path.expanduser('~/data/minc-2500/'), train=True) - testset = MINCDataloder(root=os.path.expanduser('~/data/minc-2500/'), train=False) + trainset = MINCDataset(root=os.path.expanduser('~/.encoding/data/minc-2500/'), train=True) + testset = MINCDataset(root=os.path.expanduser('~/.encoding/data/minc-2500/'), train=False) print(len(trainset)) print(len(testset)) diff --git a/experiments/recognition/main.py b/experiments/recognition/main.py index 6a97281c..d5a9f9b3 100644 --- a/experiments/recognition/main.py +++ b/experiments/recognition/main.py @@ -9,47 +9,45 @@ ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ from __future__ import print_function - import os -import matplotlib.pyplot as plot -import importlib +from tqdm import tqdm import torch import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -from torch.autograd import Variable +import encoding from option import Options -from encoding.utils import * - -from tqdm import tqdm # global variable -best_pred = 100.0 -errlist_train = [] -errlist_val = [] +best_pred = 0.0 +acclist_train = [] +acclist_val = [] def main(): # init the args - global best_pred, errlist_train, errlist_val + global best_pred, acclist_train, acclist_val args = Options().parse() args.cuda = not args.no_cuda and torch.cuda.is_available() + print(args) torch.manual_seed(args.seed) - # plot - if args.plot: - print('=>Enabling matplotlib for display:') - plot.ion() - plot.show() if args.cuda: torch.cuda.manual_seed(args.seed) # init dataloader - dataset = importlib.import_module('dataset.'+args.dataset) - Dataloader = dataset.Dataloader - train_loader, test_loader = Dataloader(args).getloader() + transform_train, transform_val = encoding.transforms.get_transform(args.dataset) + trainset = encoding.datasets.get_dataset(args.dataset, root=os.path.expanduser('~/.encoding/data'), + transform=transform_train, train=True, download=True) + valset = encoding.datasets.get_dataset(args.dataset, root=os.path.expanduser('~/.encoding/data'), + transform=transform_val, train=False, download=True) + train_loader = torch.utils.data.DataLoader( + trainset, batch_size=args.batch_size, shuffle=True, + num_workers=args.workers, pin_memory=True) + + val_loader = torch.utils.data.DataLoader( + valset, batch_size=args.test_batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + # init the model - models = importlib.import_module('model.'+args.model) - model = models.Net(args) + model = encoding.models.get_model(args.model, pretrained=args.pretrained) print(model) # criterion and optimizer criterion = nn.CrossEntropyLoss() @@ -58,8 +56,9 @@ def main(): weight_decay=args.weight_decay) if args.cuda: model.cuda() + criterion.cuda() # Please use CUDA_VISIBLE_DEVICES to control the number of gpus - model = torch.nn.DataParallel(model) + model = nn.DataParallel(model) # check point if args.resume is not None: if os.path.isfile(args.resume): @@ -67,108 +66,116 @@ def main(): checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] +1 best_pred = checkpoint['best_pred'] - errlist_train = checkpoint['errlist_train'] - errlist_val = checkpoint['errlist_val'] - model.load_state_dict(checkpoint['state_dict']) + acclist_train = checkpoint['acclist_train'] + acclist_val = checkpoint['acclist_val'] + model.module.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: raise RuntimeError ("=> no resume checkpoint found at '{}'".\ format(args.resume)) - scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, - len(train_loader), args.lr_step) + scheduler = encoding.utils.LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, + len(train_loader), args.lr_step) def train(epoch): model.train() - global best_pred, errlist_train - train_loss, correct, total = 0,0,0 + losses = AverageMeter() + top1 = AverageMeter() + global best_pred, acclist_train tbar = tqdm(train_loader, desc='\r') for batch_idx, (data, target) in enumerate(tbar): scheduler(optimizer, batch_idx, epoch, best_pred) if args.cuda: data, target = data.cuda(), target.cuda() - data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() - train_loss += loss.data.item() - pred = output.data.max(1)[1] - correct += pred.eq(target.data).cpu().sum().item() - total += target.size(0) - err = 100.0 - 100.0 * correct / total - tbar.set_description('\rLoss: %.3f | Err: %.3f%% (%d/%d)' % \ - (train_loss/(batch_idx+1), err, total-correct, total)) + acc1 = accuracy(output, target, topk=(1,)) + top1.update(acc1[0], data.size(0)) + losses.update(loss.item(), data.size(0)) + tbar.set_description('\rLoss: %.3f | Top1: %.3f'%(losses.avg, top1.avg)) - errlist_train += [err] + acclist_train += [top1.avg] - def test(epoch): + def validate(epoch): model.eval() - global best_pred, errlist_train, errlist_val - test_loss, correct, total = 0,0,0 + top1 = AverageMeter() + top5 = AverageMeter() + global best_pred, acclist_train, acclist_val is_best = False - tbar = tqdm(test_loader, desc='\r') + tbar = tqdm(val_loader, desc='\r') for batch_idx, (data, target) in enumerate(tbar): if args.cuda: data, target = data.cuda(), target.cuda() - data, target = Variable(data), Variable(target) with torch.no_grad(): output = model(data) - test_loss += criterion(output, target).data.item() - # get the index of the max log-probability - pred = output.data.max(1)[1] - correct += pred.eq(target.data).cpu().sum().item() - total += target.size(0) + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + top1.update(acc1[0], data.size(0)) + top5.update(acc5[0], data.size(0)) - err = 100.0 - 100.0 * correct / total - tbar.set_description('Loss: %.3f | Err: %.3f%% (%d/%d)'% \ - (test_loss/(batch_idx+1), err, total-correct, total)) + tbar.set_description('Top1: %.3f | Top5: %.3f'%(top1.avg, top5.avg)) if args.eval: - print('Error rate is %.3f'%err) + print('Top1 Acc: %.3f | Top5 Acc: %.3f '%(top1.avg, top5.avg)) return # save checkpoint - errlist_val += [err] - if err < best_pred: - best_pred = err + acclist_val += [top1.avg] + if top1.avg > best_pred: + best_pred = top1.avg is_best = True - save_checkpoint({ + encoding.utils.save_checkpoint({ 'epoch': epoch, - 'state_dict': model.state_dict(), + 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, - 'errlist_train':errlist_train, - 'errlist_val':errlist_val, + 'acclist_train':acclist_train, + 'acclist_val':acclist_val, }, args=args, is_best=is_best) - if args.plot: - plot.clf() - plot.xlabel('Epoches: ') - plot.ylabel('Error Rate: %') - plot.plot(errlist_train, label='train') - plot.plot(errlist_val, label='val') - plot.legend(loc='upper left') - plot.draw() - plot.pause(0.001) if args.eval: - test(args.start_epoch) + validate(args.start_epoch) return for epoch in range(args.start_epoch, args.epochs + 1): train(epoch) - test(epoch) - - # save train_val curve to a file - if args.plot: - plot.clf() - plot.xlabel('Epoches: ') - plot.ylabel('Error Rate: %') - plot.plot(errlist_train, label='train') - plot.plot(errlist_val, label='val') - plot.savefig("runs/%s/%s/"%(args.dataset, args.checkname) - +'train_val.jpg') + validate(epoch) + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count if __name__ == "__main__": main() diff --git a/experiments/recognition/model/deepten.py b/experiments/recognition/model/deepten.py index ec27a287..82a5c8c3 100644 --- a/experiments/recognition/model/deepten.py +++ b/experiments/recognition/model/deepten.py @@ -14,7 +14,7 @@ from torch.autograd import Variable import encoding -import encoding.dilated.resnet as resnet +import encoding.models.resnet as resnet class Net(nn.Module): def __init__(self, args): diff --git a/experiments/recognition/model/download_models.py b/experiments/recognition/model/download_models.py deleted file mode 100644 index 131e6882..00000000 --- a/experiments/recognition/model/download_models.py +++ /dev/null @@ -1,5 +0,0 @@ -import encoding -import shutil - -encoding.models.get_model_file('deepten_minc', root='./') -shutil.move('deepten_minc-2e22611a.pth', 'deepten_minc.pth') diff --git a/experiments/recognition/option.py b/experiments/recognition/option.py index 3f8eef79..b588d1be 100644 --- a/experiments/recognition/option.py +++ b/experiments/recognition/option.py @@ -20,6 +20,8 @@ def __init__(self): # model params parser.add_argument('--model', type=str, default='densenet', help='network model type (default: densenet)') + parser.add_argument('--pretrained', action='store_true', + default=False, help='load pretrianed mode') parser.add_argument('--nclass', type=int, default=10, metavar='N', help='number of classes (default: 10)') parser.add_argument('--widen', type=int, default=4, metavar='N', @@ -36,7 +38,9 @@ def __init__(self): parser.add_argument('--epochs', type=int, default=600, metavar='N', help='number of epochs to train (default: 600)') parser.add_argument('--start_epoch', type=int, default=1, - metavar='N', help='the epoch number to start (default: 0)') + metavar='N', help='the epoch number to start (default: 1)') + parser.add_argument('--workers', type=int, default=16, + metavar='N', help='dataloader threads') # lr setting parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') @@ -47,8 +51,8 @@ def __init__(self): # optimizer parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') - parser.add_argument('--weight-decay', type=float, default=5e-4, - metavar ='M', help='SGD weight decay (default: 5e-4)') + parser.add_argument('--weight-decay', type=float, default=1e-4, + metavar ='M', help='SGD weight decay (default: 1e-4)') # cuda, seed and logging parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') diff --git a/experiments/segmentation/option.py b/experiments/segmentation/option.py index 4d9e3c6a..9fc5c326 100644 --- a/experiments/segmentation/option.py +++ b/experiments/segmentation/option.py @@ -30,7 +30,7 @@ def __init__(self): parser.add_argument('--crop-size', type=int, default=480, help='crop image size') parser.add_argument('--train-split', type=str, default='train', - help='dataset train split (default: train)') + help='dataset train split (default: train)') # training hyper params parser.add_argument('--aux', action='store_true', default= False, help='Auxilary Loss') @@ -44,10 +44,10 @@ def __init__(self): help='number of epochs to train (default: auto)') parser.add_argument('--start_epoch', type=int, default=0, metavar='N', help='start epochs (default:0)') - parser.add_argument('--batch-size', type=int, default=None, + parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for \ training (default: auto)') - parser.add_argument('--test-batch-size', type=int, default=None, + parser.add_argument('--test-batch-size', type=int, default=16, metavar='N', help='input batch size for \ testing (default: same as batch size)') # optimizer params @@ -77,6 +77,8 @@ def __init__(self): # evaluation option parser.add_argument('--eval', action='store_true', default= False, help='evaluating mIoU') + parser.add_argument('--test-val', action='store_true', default= False, + help='generate masks on val set') parser.add_argument('--no-val', action='store_true', default= False, help='skip validation during training') # test option @@ -92,25 +94,21 @@ def parse(self): if args.epochs is None: epoches = { 'coco': 30, - 'citys': 240, + 'pascal_aug': 80, 'pascal_voc': 50, - 'pascal_aug': 50, 'pcontext': 80, - 'ade20k': 120, + 'ade20k': 180, + 'citys': 240, } args.epochs = epoches[args.dataset.lower()] - if args.batch_size is None: - args.batch_size = 16 - if args.test_batch_size is None: - args.test_batch_size = args.batch_size if args.lr is None: lrs = { - 'coco': 0.01, - 'citys': 0.01, - 'pascal_voc': 0.0001, + 'coco': 0.004, 'pascal_aug': 0.001, + 'pascal_voc': 0.0001, 'pcontext': 0.001, - 'ade20k': 0.01, + 'ade20k': 0.004, + 'citys': 0.004, } args.lr = lrs[args.dataset.lower()] / 16 * args.batch_size print(args) diff --git a/experiments/segmentation/test.py b/experiments/segmentation/test.py index 6e2fe89f..2772946f 100644 --- a/experiments/segmentation/test.py +++ b/experiments/segmentation/test.py @@ -14,7 +14,7 @@ from torch.nn.parallel.scatter_gather import gather import encoding.utils as utils -from encoding.nn import SegmentationLosses, BatchNorm2d +from encoding.nn import SegmentationLosses, SyncBatchNorm from encoding.parallel import DataParallelModel, DataParallelCriterion from encoding.datasets import get_segmentation_dataset, test_batchify_fn from encoding.models import get_model, get_segmentation_model, MultiEvalModule @@ -34,6 +34,9 @@ def test(args): if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) + elif args.test_val: + testset = get_segmentation_dataset(args.dataset, split='val', mode='test', + transform=input_transform) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) @@ -46,10 +49,12 @@ def test(args): # model if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) + #model.base_size = args.base_size + #model.crop_size = args.crop_size else: model = get_segmentation_model(args.model, dataset=args.dataset, backbone = args.backbone, aux = args.aux, - se_loss = args.se_loss, norm_layer = BatchNorm2d, + se_loss = args.se_loss, norm_layer = SyncBatchNorm, base_size=args.base_size, crop_size=args.crop_size) # resuming checkpoint if args.resume is None or not os.path.isfile(args.resume): @@ -60,8 +65,8 @@ def test(args): print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) print(model) - scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ - [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] + scales = [0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ + [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0] evaluator = MultiEvalModule(model, testset.num_class, scales=scales).cuda() evaluator.eval() metric = utils.SegmentationMetric(testset.num_class) diff --git a/experiments/segmentation/test_models.py b/experiments/segmentation/test_models.py index c9ef2698..a9ea7398 100644 --- a/experiments/segmentation/test_models.py +++ b/experiments/segmentation/test_models.py @@ -7,6 +7,7 @@ if __name__ == "__main__": args = Options().parse() model = encoding.models.get_segmentation_model(args.model, dataset=args.dataset, aux=args.aux, + backbone=args.backbone, se_loss=args.se_loss, norm_layer=torch.nn.BatchNorm2d) print('Creating the model:') diff --git a/experiments/segmentation/train.py b/experiments/segmentation/train.py index a3f3345d..135c4f38 100644 --- a/experiments/segmentation/train.py +++ b/experiments/segmentation/train.py @@ -15,9 +15,9 @@ from torch.nn.parallel.scatter_gather import gather import encoding.utils as utils -from encoding.nn import SegmentationLosses, BatchNorm2d +from encoding.nn import SegmentationLosses, SyncBatchNorm, OHEMSegmentationLosses from encoding.parallel import DataParallelModel, DataParallelCriterion -from encoding.datasets import get_segmentation_dataset +from encoding.datasets import get_dataset from encoding.models import get_segmentation_model from option import Options @@ -36,9 +36,9 @@ def __init__(self, args): # dataset data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size} - trainset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train', + trainset = get_dataset(args.dataset, split=args.train_split, mode='train', **data_kwargs) - testset = get_segmentation_dataset(args.dataset, split='val', mode ='val', + testset = get_dataset(args.dataset, split='val', mode ='val', **data_kwargs) # dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} \ @@ -51,7 +51,7 @@ def __init__(self, args): # model model = get_segmentation_model(args.model, dataset=args.dataset, backbone = args.backbone, aux = args.aux, - se_loss = args.se_loss, norm_layer = BatchNorm2d, + se_loss = args.se_loss, norm_layer = SyncBatchNorm, base_size=args.base_size, crop_size=args.crop_size) print(model) # optimizer using different LR @@ -63,7 +63,8 @@ def __init__(self, args): optimizer = torch.optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # criterions - self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux, + self.criterion = SegmentationLosses(se_loss=args.se_loss, + aux=args.aux, nclass=self.nclass, se_weight=args.se_weight, aux_weight=args.aux_weight) @@ -160,12 +161,12 @@ def eval_batch(model, image, target): if new_pred > self.best_pred: is_best = True self.best_pred = new_pred - utils.save_checkpoint({ - 'epoch': epoch + 1, - 'state_dict': self.model.module.state_dict(), - 'optimizer': self.optimizer.state_dict(), - 'best_pred': self.best_pred, - }, self.args, is_best) + utils.save_checkpoint({ + 'epoch': epoch + 1, + 'state_dict': self.model.module.state_dict(), + 'optimizer': self.optimizer.state_dict(), + 'best_pred': self.best_pred, + }, self.args, is_best) if __name__ == "__main__": @@ -174,7 +175,10 @@ def eval_batch(model, image, target): trainer = Trainer(args) print('Starting Epoch:', trainer.args.start_epoch) print('Total Epoches:', trainer.args.epochs) - for epoch in range(trainer.args.start_epoch, trainer.args.epochs): - trainer.training(epoch) - if not trainer.args.no_val: - trainer.validation(epoch) + if args.eval: + trainer.validation(trainer.args.start_epoch) + else: + for epoch in range(trainer.args.start_epoch, trainer.args.epochs): + trainer.training(epoch) + if not trainer.args.no_val: + trainer.validation(epoch) diff --git a/scripts/prepare_cityscapes.py b/scripts/prepare_cityscapes.py deleted file mode 100644 index 19722cb8..00000000 --- a/scripts/prepare_cityscapes.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Prepare ADE20K dataset""" -import os -import shutil -import argparse -import zipfile -from encoding.utils import download, mkdir, check_sha1 - -_TARGET_DIR = os.path.expanduser('~/.encoding/data') - -def parse_args(): - parser = argparse.ArgumentParser( - description='Initialize ADE20K dataset.', - epilog='Example: python prepare_cityscapes.py', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--download-dir', default=None, help='dataset directory on disk') - args = parser.parse_args() - return args - -def download_city(path, overwrite=False): - _CITY_DOWNLOAD_URLS = [ - #('gtCoarse.zip', '61f23198bfff5286e0d7e316ad5c4dbbaaf4717a'), - ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'), - ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')] - download_dir = os.path.join(path, 'downloads') - mkdir(download_dir) - for filename, checksum in _CITY_DOWNLOAD_URLS: - if not check_sha1(filename, checksum): - raise UserWarning('File {} is downloaded but the content hash does not match. ' \ - 'The repo may be outdated or download may be incomplete. ' \ - 'If the "repo_url" is overridden, consider switching to ' \ - 'the default repo.'.format(filename)) - # extract - with zipfile.ZipFile(filename,"r") as zip_ref: - zip_ref.extractall(path=path) - print("Extracted", filename) - -if __name__ == '__main__': - args = parse_args() - mkdir(os.path.expanduser('~/.encoding/data')) - mkdir(os.path.expanduser('~/.encoding/data/cityscapes')) - if args.download_dir is not None: - if os.path.isdir(_TARGET_DIR): - os.remove(_TARGET_DIR) - # make symlink - os.symlink(args.download_dir, _TARGET_DIR) - else: - download_city(_TARGET_DIR, overwrite=False) - diff --git a/scripts/prepare_coco.py b/scripts/prepare_coco.py index ecbe5c35..0a6053c2 100644 --- a/scripts/prepare_coco.py +++ b/scripts/prepare_coco.py @@ -20,21 +20,28 @@ def download_coco(path, overwrite=False): _DOWNLOAD_URLS = [ ('http://images.cocodataset.org/zips/train2017.zip', '10ad623668ab00c62c096f0ed636d6aff41faca5'), - ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', - '8551ee4bb5860311e79dace7e79cb91e432e78b3'), ('http://images.cocodataset.org/zips/val2017.zip', '4950dc9d00dbe1c933ee0170f5797584351d2a41'), + ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + '8551ee4bb5860311e79dace7e79cb91e432e78b3'), #('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip', # '46cdcf715b6b4f67e980b529534e79c2edffe084'), #('http://images.cocodataset.org/zips/test2017.zip', # '99813c02442f3c112d491ea6f30cecf421d0e6b3'), + ('https://hangzh.s3.amazonaws.com/encoding/data/coco/train_ids.pth', + '12cd266f97c8d9ea86e15a11f11bcb5faba700b6'), + ('https://hangzh.s3.amazonaws.com/encoding/data/coco/val_ids.pth', + '4ce037ac33cbf3712fd93280a1c5e92dae3136bb'), ] mkdir(path) for url, checksum in _DOWNLOAD_URLS: filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract - with zipfile.ZipFile(filename) as zf: - zf.extractall(path=path) + if os.path.splitext(filename)[1] == '.zip': + with zipfile.ZipFile(filename) as zf: + zf.extractall(path=path) + else: + shutil.move(filename, os.path.join(path, 'annotations/'+os.path.basename(filename))) def install_coco_api(): diff --git a/scripts/prepare_imagenet.py b/scripts/prepare_imagenet.py new file mode 100644 index 00000000..e51df53e --- /dev/null +++ b/scripts/prepare_imagenet.py @@ -0,0 +1,131 @@ +"""Prepare the ImageNet dataset""" +import os +import argparse +import tarfile +import pickle +import gzip +import subprocess +from tqdm import tqdm +from encoding.utils import check_sha1, download, mkdir + +_TARGET_DIR = os.path.expanduser('~/.encoding/datasets/imagenet') +_TRAIN_TAR = 'ILSVRC2012_img_train.tar' +_TRAIN_TAR_SHA1 = '43eda4fe35c1705d6606a6a7a633bc965d194284' +_VAL_TAR = 'ILSVRC2012_img_val.tar' +_VAL_TAR_SHA1 = '5f3f73da3395154b60528b2b2a2caf2374f5f178' + +def parse_args(): + parser = argparse.ArgumentParser( + description='Setup the ImageNet dataset.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--download-dir', required=True, + help="The directory that contains downloaded tar files") + parser.add_argument('--target-dir', default=_TARGET_DIR, + help="The directory to store extracted images") + parser.add_argument('--checksum', action='store_true', + help="If check integrity before extracting.") + parser.add_argument('--with-rec', action='store_true', + help="If build image record files.") + parser.add_argument('--num-thread', type=int, default=1, + help="Number of threads to use when building image record file.") + args = parser.parse_args() + return args + +def check_file(filename, checksum, sha1): + if not os.path.exists(filename): + raise ValueError('File not found: '+filename) + if checksum and not check_sha1(filename, sha1): + raise ValueError('Corrupted file: '+filename) + +def build_rec_process(img_dir, train=False, num_thread=1): + rec_dir = os.path.abspath(os.path.join(img_dir, '../rec')) + mkdir(rec_dir) + prefix = 'train' if train else 'val' + print('Building ImageRecord file for ' + prefix + ' ...') + to_path = rec_dir + + # download lst file and im2rec script + script_path = os.path.join(rec_dir, 'im2rec.py') + script_url = 'https://raw.githubusercontent.com/apache/incubator-encoding/master/tools/im2rec.py' + download(script_url, script_path) + + lst_path = os.path.join(rec_dir, prefix + '.lst') + lst_url = 'http://data.encoding.io/models/imagenet/resnet/' + prefix + '.lst' + download(lst_url, lst_path) + + # execution + import sys + cmd = [ + sys.executable, + script_path, + rec_dir, + img_dir, + '--recursive', + '--pass-through', + '--pack-label', + '--num-thread', + str(num_thread) + ] + subprocess.call(cmd) + os.remove(script_path) + os.remove(lst_path) + print('ImageRecord file for ' + prefix + ' has been built!') + +def extract_train(tar_fname, target_dir, with_rec=False, num_thread=1): + os.makedirs(target_dir) + with tarfile.open(tar_fname) as tar: + print("Extracting "+tar_fname+"...") + # extract each class one-by-one + pbar = tqdm(total=len(tar.getnames())) + for class_tar in tar: + pbar.set_description('Extract '+class_tar.name) + tar.extract(class_tar, target_dir) + class_fname = os.path.join(target_dir, class_tar.name) + class_dir = os.path.splitext(class_fname)[0] + os.mkdir(class_dir) + with tarfile.open(class_fname) as f: + f.extractall(class_dir) + os.remove(class_fname) + pbar.update(1) + pbar.close() + if with_rec: + build_rec_process(target_dir, True, num_thread) + +def extract_val(tar_fname, target_dir, with_rec=False, num_thread=1): + os.makedirs(target_dir) + print('Extracting ' + tar_fname) + with tarfile.open(tar_fname) as tar: + tar.extractall(target_dir) + # build rec file before images are moved into subfolders + if with_rec: + build_rec_process(target_dir, False, num_thread) + # move images to proper subfolders + val_maps_file = os.path.join(os.path.dirname(__file__), 'imagenet_val_maps.pklz') + with gzip.open(val_maps_file, 'rb') as f: + dirs, mappings = pickle.load(f) + for d in dirs: + os.makedirs(os.path.join(target_dir, d)) + for m in mappings: + os.rename(os.path.join(target_dir, m[0]), os.path.join(target_dir, m[1], m[0])) + +def main(): + args = parse_args() + + target_dir = os.path.expanduser(args.target_dir) + if os.path.exists(target_dir): + raise ValueError('Target dir ['+target_dir+'] exists. Remove it first') + + download_dir = os.path.expanduser(args.download_dir) + train_tar_fname = os.path.join(download_dir, _TRAIN_TAR) + check_file(train_tar_fname, args.checksum, _TRAIN_TAR_SHA1) + val_tar_fname = os.path.join(download_dir, _VAL_TAR) + check_file(val_tar_fname, args.checksum, _VAL_TAR_SHA1) + + build_rec = args.with_rec + if build_rec: + os.makedirs(os.path.join(target_dir, 'rec')) + extract_train(train_tar_fname, os.path.join(target_dir, 'train'), build_rec, args.num_thread) + extract_val(val_tar_fname, os.path.join(target_dir, 'val'), build_rec, args.num_thread) + +if __name__ == '__main__': + main() diff --git a/scripts/prepare_pcontext.py b/scripts/prepare_pcontext.py index eecf81ff..9341a5d2 100644 --- a/scripts/prepare_pcontext.py +++ b/scripts/prepare_pcontext.py @@ -23,7 +23,6 @@ def download_ade(path, overwrite=False): 'bf9985e9f2b064752bf6bd654d89f017c76c395a'), ('https://codalabuser.blob.core.windows.net/public/trainval_merged.json', '169325d9f7e9047537fedca7b04de4dddf10b881'), - # You can skip these if the network is slow, the dataset will automatically generate them. ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/train.pth', '4bfb49e8c1cefe352df876c9b5434e655c9c1d07'), ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/val.pth', diff --git a/setup.py b/setup.py index 5a095786..263bd5f8 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ cwd = os.path.dirname(os.path.abspath(__file__)) -version = '0.5.1' +version = '1.0.1' try: sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() diff --git a/tests/unit_test/test_function.py b/tests/unit_test/test_function.py index c29f0c67..826b4a25 100644 --- a/tests/unit_test/test_function.py +++ b/tests/unit_test/test_function.py @@ -173,13 +173,13 @@ def mahalanobis_dist(X, C, STD): test = gradcheck(encoding.functions.encoding_dist_inference, input, eps=EPS, atol=ATOL) print('Testing encoding_dist_inference(): {}'.format(test)) -def test_sum_square(): +def test_moments(): B,C,H = 2,3,4 X = Variable(torch.cuda.DoubleTensor(B,C,H).uniform_(-0.5,0.5), requires_grad=True) input = (X,) - test = gradcheck(encoding.functions.sum_square, input, eps=EPS, atol=ATOL) - print('Testing sum_square(): {}'.format(test)) + test = gradcheck(encoding.functions.moments, input, eps=EPS, atol=ATOL) + print('Testing moments(): {}'.format(test)) def test_syncbn_func(): # generate input diff --git a/tests/unit_test/test_module.py b/tests/unit_test/test_module.py index 90d05793..77d1bc8b 100644 --- a/tests/unit_test/test_module.py +++ b/tests/unit_test/test_module.py @@ -49,7 +49,7 @@ def _check_batchnorm_result(bn1, bn2, input, is_train, cuda=False): def _find_bn(module): for m in module.modules(): if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, - encoding.nn.BatchNorm1d, encoding.nn.BatchNorm2d)): + encoding.nn.SyncBatchNorm)): return m def _syncParameters(bn1, bn2): bn1.reset_parameters() @@ -70,29 +70,128 @@ def _syncParameters(bn1, bn2): input1 = Variable(input.clone().detach(), requires_grad=True) input2 = Variable(input.clone().detach(), requires_grad=True) - output1 = bn1(input1) - output2 = bn2(input2) + if is_train: + bn1.train() + bn2.train() + output1 = bn1(input1) + output2 = bn2(input2) + else: + bn1.eval() + bn2.eval() + with torch.no_grad(): + output1 = bn1(input1) + output2 = bn2(input2) # assert forwarding - _assert_tensor_close(input1.data, input2.data) + #_assert_tensor_close(input1.data, input2.data) _assert_tensor_close(output1.data, output2.data) if not is_train: return (output1 ** 2).sum().backward() (output2 ** 2).sum().backward() + _assert_tensor_close(_find_bn(bn1).bias.grad.data, _find_bn(bn2).bias.grad.data) + _assert_tensor_close(_find_bn(bn1).weight.grad.data, _find_bn(bn2).weight.grad.data) _assert_tensor_close(input1.grad.data, input2.grad.data) _assert_tensor_close(_find_bn(bn1).running_mean, _find_bn(bn2).running_mean) - _assert_tensor_close(_find_bn(bn1).running_var, _find_bn(bn2).running_var) - + #_assert_tensor_close(_find_bn(bn1).running_var, _find_bn(bn2).running_var) bn = torch.nn.BatchNorm2d(10).cuda().double() - sync_bn = encoding.nn.BatchNorm2d(10).double() + sync_bn = encoding.nn.SyncBatchNorm(10, inplace=True, sync=True).cuda().double() sync_bn = torch.nn.DataParallel(sync_bn).cuda() - encoding.parallel.patch_replication_callback(sync_bn) # check with unsync version + #_check_batchnorm_result(bn, sync_bn, torch.rand(2, 1, 2, 2).double(), True, cuda=True) for i in range(10): print(i) _check_batchnorm_result(bn, sync_bn, torch.rand(16, 10, 16, 16).double(), True, cuda=True) - _check_batchnorm_result(bn, sync_bn, torch.rand(16, 10, 16, 16).double(), False, cuda=True) + #_check_batchnorm_result(bn, sync_bn, torch.rand(16, 10, 16, 16).double(), False, cuda=True) + + +def testABN(): + class NormAct(torch.nn.BatchNorm2d): + def __init__(self, num_features, eps=1e-5, momentum=0.1, sync=True, activation="none", + slope=0.01): + super(NormAct, self).__init__(num_features, eps=eps, momentum=momentum, affine=True) + self.slope = slope + + def forward(self, x): + exponential_average_factor = 0.0 + if self.training and self.track_running_stats: + self.num_batches_tracked += 1 + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / self.num_batches_tracked.item() + else: # use exponential moving average + exponential_average_factor = self.momentum + + y = torch.nn.functional.batch_norm( + x, self.running_mean, self.running_var, self.weight, self.bias, + self.training or not self.track_running_stats, + exponential_average_factor, self.eps) + return torch.nn.functional.leaky_relu_(y, self.slope) + + def _check_batchnorm_result(bn1, bn2, input, is_train, cuda=False): + def _find_bn(module): + for m in module.modules(): + if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, + encoding.nn.SyncBatchNorm)): + return m + def _syncParameters(bn1, bn2): + bn1.reset_parameters() + bn2.reset_parameters() + if bn1.affine and bn2.affine: + bn2.weight.data.copy_(bn1.weight.data) + bn2.bias.data.copy_(bn1.bias.data) + bn2.running_mean.copy_(bn1.running_mean) + bn2.running_var.copy_(bn1.running_var) + + bn1.train(mode=is_train) + bn2.train(mode=is_train) + + if cuda: + input = input.cuda() + # using the same values for gamma and beta + _syncParameters(_find_bn(bn1), _find_bn(bn2)) + + input1 = Variable(input.clone().detach(), requires_grad=True) + input2 = Variable(input.clone().detach(), requires_grad=True) + if is_train: + bn1.train() + bn2.train() + output1 = bn1(input1) + output2 = bn2(input2) + else: + bn1.eval() + bn2.eval() + with torch.no_grad(): + output1 = bn1(input1) + output2 = bn2(input2) + # assert forwarding + _assert_tensor_close(output1.data, output2.data) + if not is_train: + return + loss1 = (output1 ** 2).sum() + loss2 = (output2 ** 2).sum() + loss1.backward() + loss2.backward() + _assert_tensor_close(_find_bn(bn1).bias.grad.data, _find_bn(bn2).bias.grad.data) + _assert_tensor_close(_find_bn(bn1).weight.grad.data, _find_bn(bn2).weight.grad.data) + _assert_tensor_close(input1.grad.data, input2.grad.data) + _assert_tensor_close(_find_bn(bn1).running_mean, _find_bn(bn2).running_mean) + + bn = NormAct(10).cuda().double() + inp_abn = encoding.nn.SyncBatchNorm(10, sync=False, activation='leaky_relu', inplace=True).cuda().double() + inp_abn = torch.nn.DataParallel(inp_abn).cuda() + # check with unsync version + for i in range(10): + print(i) + _check_batchnorm_result(bn, inp_abn, torch.rand(16, 10, 16, 16).double(), True, cuda=True) + #_check_batchnorm_result(bn, inp_abn, torch.rand(16, 10, 16, 16).double(), False, cuda=True) + + +def test_Atten_Module(): + B, C, H, W = 8, 24, 10, 10 + X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), + requires_grad=True) + layer1 = encoding.nn.MultiHeadAttention(4, 24, 24, 24).double().cuda() + Y = layer1(X) if __name__ == '__main__': import nose