diff --git a/.dev/batch_test_list.py b/.dev/batch_test_list.py index c4fd8f97e4..0d096ed943 100644 --- a/.dev/batch_test_list.py +++ b/.dev/batch_test_list.py @@ -2,25 +2,25 @@ # Inference Speed is tested on NVIDIA V100 hrnet = [ dict( - config='configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py', + config='configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py', checkpoint='fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth', # noqa eval='mIoU', metric=dict(mIoU=33.0), ), dict( - config='configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py', + config='configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py', checkpoint='fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth', # noqa eval='mIoU', metric=dict(mIoU=76.31), ), dict( - config='configs/hrnet/fcn_hr48_512x512_160k_ade20k.py', + config='configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py', checkpoint='fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth', eval='mIoU', metric=dict(mIoU=42.02), ), dict( - config='configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py', + config='configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py', checkpoint='fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth', # noqa eval='mIoU', metric=dict(mIoU=80.65), @@ -28,25 +28,25 @@ ] pspnet = [ dict( - config='configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py', + config='configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py', checkpoint='pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth', # noqa eval='mIoU', metric=dict(mIoU=78.55), ), dict( - config='configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py', + config='configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py', checkpoint='pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth', # noqa eval='mIoU', metric=dict(mIoU=79.76), ), dict( - config='configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py', + config='configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py', checkpoint='pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth', # noqa eval='mIoU', metric=dict(mIoU=44.39), ), dict( - config='configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py', + config='configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py', checkpoint='pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth', # noqa eval='mIoU', metric=dict(mIoU=42.48), @@ -54,13 +54,13 @@ ] resnest = [ dict( - config='configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py', + config='configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py', # noqa checkpoint='pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth', # noqa eval='mIoU', metric=dict(mIoU=45.44), ), dict( - config='configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py', + config='configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py', # noqa checkpoint='pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth', # noqa eval='mIoU', metric=dict(mIoU=78.57), @@ -68,7 +68,7 @@ ] fastscnn = [ dict( - config='configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py', + config='configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py', checkpoint='fast_scnn_8x4_160k_lr0.12_cityscapes-0cec9937.pth', eval='mIoU', metric=dict(mIoU=70.96), @@ -76,25 +76,25 @@ ] deeplabv3plus = [ dict( - config='configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py', # noqa + config='configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py', # noqa checkpoint='deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth', # noqa eval='mIoU', metric=dict(mIoU=80.98), ), dict( - config='configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py', # noqa + config='configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py', # noqa checkpoint='deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth', # noqa eval='mIoU', metric=dict(mIoU=80.97), ), dict( - config='configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py', # noqa + config='configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py', # noqa checkpoint='deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth', # noqa eval='mIoU', metric=dict(mIoU=80.09), ), dict( - config='configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py', # noqa + config='configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py', # noqa checkpoint='deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth', # noqa eval='mIoU', metric=dict(mIoU=79.83), @@ -102,13 +102,13 @@ ] vit = [ dict( - config='configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py', + config='configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py', # noqa checkpoint='upernet_vit-b16_ln_mln_512x512_160k_ade20k-f444c077.pth', eval='mIoU', metric=dict(mIoU=47.73), ), dict( - config='configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py', + config='configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py', # noqa checkpoint='upernet_deit-s16_ln_mln_512x512_160k_ade20k-c0cd652f.pth', eval='mIoU', metric=dict(mIoU=43.52), @@ -116,7 +116,7 @@ ] fp16 = [ dict( - config='configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py', # noqa + config='configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py', # noqa checkpoint='deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth', # noqa eval='mIoU', metric=dict(mIoU=80.46), @@ -124,7 +124,7 @@ ] swin = [ dict( - config='configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py', # noqa + config='configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py', # noqa checkpoint='upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth', # noqa eval='mIoU', metric=dict(mIoU=44.41), diff --git a/.dev/batch_train_list.txt b/.dev/batch_train_list.txt index 17d19932e6..6c1a122dc4 100644 --- a/.dev/batch_train_list.txt +++ b/.dev/batch_train_list.txt @@ -1,19 +1,19 @@ -configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py -configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py -configs/hrnet/fcn_hr48_512x512_160k_ade20k.py -configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py -configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py -configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py -configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py -configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py -configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py -configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py -configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py -configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py -configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py -configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py -configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py -configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py -configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py -configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py -configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py +configs/hrnet/fcn_hr18s_4xb4-160k_ade20k-512x512.py +configs/hrnet/fcn_hr18s_4xb2-160k_cityscapes-512x1024.py +configs/hrnet/fcn_hr48_4xb4-160k_ade20k-512x512.py +configs/hrnet/fcn_hr48_4xb2-160k_cityscapes-512x1024.py +configs/pspnet/pspnet_r50-d8_4xb2-80k_cityscapes-512x1024.py +configs/pspnet/pspnet_r101-d8_4xb2-80k_cityscapes-512x1024.py +configs/pspnet/pspnet_r101-d8_4xb4-160k_ade20k-512x512.py +configs/pspnet/pspnet_r50-d8_4xb4-160k_ade20k-512x512.py +configs/resnest/resnest_s101-d8_pspnet_4xb4-160k_ade20k-512x512.py +configs/resnest/resnest_s101-d8_pspnet_4xb2-80k_cityscapes512x1024.py +configs/fastscnn/fast_scnn_8xb4-160k_cityscapes-512x1024.py +configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-769x769.py +configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024.py +configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-512x1024.py +configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-80k_cityscapes-769x769.py +configs/vit/vit_vit-b16-ln_mln_upernet_8xb2-160k_ade20k-512x512.py +configs/vit/vit_deit-s16-ln_mln_upernet_512x512_160k_ade20k-512x512.py +configs/deeplabv3plus/deeplabv3plus_r101-d8_4xb2-amp-80k_cityscapes-512x1024.py +configs/swin/swin-tiny-patch4-window7-in1k-pre_upernet_8xb2-160k_ade20k-512x512.py diff --git a/README.md b/README.md index 308fca8716..9b4a580f39 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,6 @@
 
- -
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmsegmentation)](https://pypi.org/project/mmsegmentation/) [![PyPI](https://img.shields.io/pypi/v/mmsegmentation)](https://pypi.org/project/mmsegmentation) @@ -33,6 +31,22 @@ Documentation: English | [简体中文](README_zh-CN.md) + + +
+ + + + + + + + + + + +
+ ## Introduction MMSegmentation is an open source semantic segmentation toolbox based on PyTorch. @@ -62,11 +76,11 @@ The 1.x branch works with **PyTorch 1.6+**. ## What's New -v1.0.0rc5 was released on 01/02/2023. +v1.0.0rc6 was released on 03/03/2023. Please refer to [changelog.md](docs/en/notes/changelog.md) for details and release history. -- Support ISNet (ICCV'2021) in projects ([#2400](https://github.com/open-mmlab/mmsegmentation/pull/2400)) -- Support HSSN (CVPR'2022) in projects ([#2444](https://github.com/open-mmlab/mmsegmentation/pull/2444)) +- Support MMSegInferencer ([#2413](https://github.com/open-mmlab/mmsegmentation/pull/2413), [#2658](https://github.com/open-mmlab/mmsegmentation/pull/2658)) +- Support REFUGE dataset ([#2554](https://github.com/open-mmlab/mmsegmentation/pull/2554)) ## Installation @@ -81,13 +95,14 @@ There are also [advanced tutorials](https://mmsegmentation.readthedocs.io/en/dev A Colab tutorial is also provided. You may preview the notebook [here](demo/MMSegmentation_Tutorial.ipynb) or directly [run](https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/1.x/demo/MMSegmentation_Tutorial.ipynb) on Colab. -To migrate from MMSegmentation 1.x, please refer to [migration](docs/en/migration.md). +To migrate from MMSegmentation 1.x, please refer to [migration](docs/en/migration). ## Benchmark and model zoo Results and models are available in the [model zoo](docs/en/model_zoo.md). -Supported backbones: +
+Supported backbones: - [x] ResNet (CVPR'2016) - [x] ResNeXt (CVPR'2017) @@ -103,7 +118,10 @@ Supported backbones: - [x] [MAE (CVPR'2022)](configs/mae) - [x] [PoolFormer (CVPR'2022)](configs/poolformer) -Supported methods: +
+ +
+Supported methods: - [x] [FCN (CVPR'2015/TPAMI'2017)](configs/fcn) - [x] [ERFNet (T-ITS'2017)](configs/erfnet) @@ -142,7 +160,10 @@ Supported methods: - [x] [MaskFormer (NeurIPS'2021)](configs/maskformer) - [x] [Mask2Former (CVPR'2022)](configs/mask2former) -Supported datasets: +
+ +
+Supported datasets: - [x] [Cityscapes](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#cityscapes) - [x] [PASCAL VOC](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#pascal-voc) @@ -161,8 +182,14 @@ Supported datasets: - [x] [Vaihingen](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#isprs-vaihingen) - [x] [iSAID](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/en/user_guides/2_dataset_prepare.md#isaid) +
+ Please refer to [FAQ](docs/en/notes/faq.md) for frequently asked questions. +## Projects + +[Here](projects/README.md) are some implementations of SOTA models and solutions built on MMSegmentation, which are supported and maintained by community users. These projects demonstrate the best practices based on MMSegmentation for research and product development. We welcome and appreciate all the contributions to OpenMMLab ecosystem. + ## Contributing We appreciate all contributions to improve MMSegmentation. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline. @@ -191,7 +218,7 @@ If you find this project useful in your research, please consider cite: This project is released under the [Apache 2.0 license](LICENSE). -## Projects in OpenMMLab +## OpenMMLab Family - [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab foundational library for training deep learning models - [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision. diff --git a/README_zh-CN.md b/README_zh-CN.md index 8db2746413..858485fd54 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -61,7 +61,7 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O ## 更新日志 -最新版本 v1.0.0rc5 在 2023.02.01 发布。 +最新版本 v1.0.0rc6 在 2023.03.03 发布。 如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/en/notes/changelog.md)。 ## 安装 @@ -82,7 +82,8 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O 测试结果和模型可以在[模型库](docs/zh_cn/model_zoo.md)中找到。 -已支持的骨干网络: +
+已支持的骨干网络: - [x] ResNet (CVPR'2016) - [x] ResNeXt (CVPR'2017) @@ -98,7 +99,10 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O - [x] [MAE (CVPR'2022)](configs/mae) - [x] [PoolFormer (CVPR'2022)](configs/poolformer) -已支持的算法: +
+ +
+已支持的算法: - [x] [FCN (CVPR'2015/TPAMI'2017)](configs/fcn) - [x] [ERFNet (T-ITS'2017)](configs/erfnet) @@ -137,7 +141,10 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O - [x] [MaskFormer (NeurIPS'2021)](configs/maskformer) - [x] [Mask2Former (CVPR'2022)](configs/mask2former) -已支持的数据集: +
+ +
+已支持的数据集: - [x] [Cityscapes](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#cityscapes) - [x] [PASCAL VOC](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#pascal-voc) @@ -156,15 +163,22 @@ MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 O - [x] [Vaihingen](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#isprs-vaihingen) - [x] [iSAID](https://github.com/open-mmlab/mmsegmentation/blob/1.x/docs/zh_cn/dataset_prepare.md#isaid) +
+ 如果遇到问题,请参考 [常见问题解答](docs/zh_cn/notes/faq.md)。 +## 社区项目 + +[这里](projects/README.md)有一些由社区用户支持和维护的基于 MMSegmentation 的 SOTA 模型和解决方案的实现。这些项目展示了基于 MMSegmentation 的研究和产品开发的最佳实践。 +我们欢迎并感谢对 OpenMMLab 生态系统的所有贡献。 + ## 贡献指南 我们感谢所有的贡献者为改进和提升 MMSegmentation 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。 ## 致谢 -MMSegmentation 是一个由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。 我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具,供用户复现已有算法并开发自己的新模型,从而不断为开源社区提供贡献。 +MMSegmentation 是一个由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具,供用户复现已有算法并开发自己的新模型,从而不断为开源社区提供贡献。 ## 引用 diff --git a/configs/_base_/datasets/ade20k.py b/configs/_base_/datasets/ade20k.py index 2c01b2ff59..48340d11ee 100644 --- a/configs/_base_/datasets/ade20k.py +++ b/configs/_base_/datasets/ade20k.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/ade20k_640x640.py b/configs/_base_/datasets/ade20k_640x640.py index 866403b27f..c1f642da7f 100644 --- a/configs/_base_/datasets/ade20k_640x640.py +++ b/configs/_base_/datasets/ade20k_640x640.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/chase_db1.py b/configs/_base_/datasets/chase_db1.py index 62dd3b3cbe..ed47c2dbe5 100644 --- a/configs/_base_/datasets/chase_db1.py +++ b/configs/_base_/datasets/chase_db1.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py index b7d95c1ec0..b63a4cdfe7 100644 --- a/configs/_base_/datasets/cityscapes.py +++ b/configs/_base_/datasets/cityscapes.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/coco-stuff10k.py b/configs/_base_/datasets/coco-stuff10k.py index 9d3026bd4c..5d6bb12b97 100644 --- a/configs/_base_/datasets/coco-stuff10k.py +++ b/configs/_base_/datasets/coco-stuff10k.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/coco-stuff164k.py b/configs/_base_/datasets/coco-stuff164k.py index c785e313ff..baf633f9d6 100644 --- a/configs/_base_/datasets/coco-stuff164k.py +++ b/configs/_base_/datasets/coco-stuff164k.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/drive.py b/configs/_base_/datasets/drive.py index 3bd6080aa7..6a3dd82c64 100644 --- a/configs/_base_/datasets/drive.py +++ b/configs/_base_/datasets/drive.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/hrf.py b/configs/_base_/datasets/hrf.py index b0ae34abe6..353d070472 100644 --- a/configs/_base_/datasets/hrf.py +++ b/configs/_base_/datasets/hrf.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/isaid.py b/configs/_base_/datasets/isaid.py index 8407e06ac9..5cd4309f6d 100644 --- a/configs/_base_/datasets/isaid.py +++ b/configs/_base_/datasets/isaid.py @@ -32,7 +32,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/loveda.py b/configs/_base_/datasets/loveda.py index 8ecc919654..b93bc74af1 100644 --- a/configs/_base_/datasets/loveda.py +++ b/configs/_base_/datasets/loveda.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/pascal_context_59.py b/configs/_base_/datasets/pascal_context_59.py index bb144dd202..7f31043ed0 100644 --- a/configs/_base_/datasets/pascal_context_59.py +++ b/configs/_base_/datasets/pascal_context_59.py @@ -28,7 +28,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/pascal_voc12.py b/configs/_base_/datasets/pascal_voc12.py index 0fa3d55764..5235ca9cfe 100644 --- a/configs/_base_/datasets/pascal_voc12.py +++ b/configs/_base_/datasets/pascal_voc12.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/pascal_voc12_aug.py b/configs/_base_/datasets/pascal_voc12_aug.py index 8b358cc0cd..69c3654880 100644 --- a/configs/_base_/datasets/pascal_voc12_aug.py +++ b/configs/_base_/datasets/pascal_voc12_aug.py @@ -27,7 +27,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/potsdam.py b/configs/_base_/datasets/potsdam.py index 4439f41919..95f6039351 100644 --- a/configs/_base_/datasets/potsdam.py +++ b/configs/_base_/datasets/potsdam.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/refuge.py b/configs/_base_/datasets/refuge.py new file mode 100644 index 0000000000..79bb4d4e94 --- /dev/null +++ b/configs/_base_/datasets/refuge.py @@ -0,0 +1,90 @@ +# dataset settings +dataset_type = 'REFUGEDataset' +data_root = 'data/REFUGE' +train_img_scale = (2056, 2124) +val_img_scale = (1634, 1634) +test_img_scale = (1634, 1634) +crop_size = (512, 512) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=False), + dict( + type='RandomResize', + scale=train_img_scale, + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +val_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=val_img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=False), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=test_img_scale, keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations', reduce_zero_label=False), + dict(type='PackSegInputs') +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict( + type='TestTimeAug', + transforms=[ + [ + dict(type='Resize', scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type='RandomFlip', prob=0., direction='horizontal'), + dict(type='RandomFlip', prob=1., direction='horizontal') + ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')] + ]) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/training', seg_map_path='annotations/training'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/validation', + seg_map_path='annotations/validation'), + pipeline=val_pipeline)) +test_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='images/test', seg_map_path='annotations/test'), + pipeline=val_pipeline)) + +val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice']) +test_evaluator = val_evaluator diff --git a/configs/_base_/datasets/stare.py b/configs/_base_/datasets/stare.py index e55519b595..b7545dc623 100644 --- a/configs/_base_/datasets/stare.py +++ b/configs/_base_/datasets/stare.py @@ -26,7 +26,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/datasets/vaihingen.py b/configs/_base_/datasets/vaihingen.py index 2b3fa76093..6c78994fe7 100644 --- a/configs/_base_/datasets/vaihingen.py +++ b/configs/_base_/datasets/vaihingen.py @@ -25,7 +25,7 @@ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/configs/_base_/models/fpn_poolformer_s12.py b/configs/_base_/models/fpn_poolformer_s12.py index 483d823308..b6893f6977 100644 --- a/configs/_base_/models/fpn_poolformer_s12.py +++ b/configs/_base_/models/fpn_poolformer_s12.py @@ -1,7 +1,10 @@ # model settings norm_cfg = dict(type='SyncBN', requires_grad=True) checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa -custom_imports = dict(imports='mmcls.models', allow_failed_imports=False) +# TODO: delete custom_imports after mmcls supports auto import +# please install mmcls>=1.0 +# import mmcls.models to trigger register_module in mmcls +custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False) data_preprocessor = dict( type='SegDataPreProcessor', mean=[123.675, 116.28, 103.53], diff --git a/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py b/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py index fc132a698f..d2211b66a3 100644 --- a/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py +++ b/configs/mask2former/mask2former_r50_8xb2-90k_cityscapes-512x1024.py @@ -1,7 +1,5 @@ _base_ = ['../_base_/default_runtime.py', '../_base_/datasets/cityscapes.py'] -custom_imports = dict(imports='mmdet.models', allow_failed_imports=False) - crop_size = (512, 1024) data_preprocessor = dict( type='SegDataPreProcessor', diff --git a/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py b/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py index 4e4036db3a..b8b1d6cfff 100644 --- a/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py +++ b/configs/mask2former/mask2former_swin-b-in1k-384x384-pre_8xb2-160k_ade20k-640x640.py @@ -3,7 +3,6 @@ ] pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth' # noqa -custom_imports = dict(imports='mmdet.models', allow_failed_imports=False) crop_size = (640, 640) data_preprocessor = dict( diff --git a/configs/mobilenet_v2/README.md b/configs/mobilenet_v2/README.md index c1010044a9..30f1fe3ce2 100644 --- a/configs/mobilenet_v2/README.md +++ b/configs/mobilenet_v2/README.md @@ -39,12 +39,12 @@ The MobileNetV2 architecture is based on an inverted residual structure where th ### Cityscapes -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | 61.54 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | -| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | -| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | -| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | 71.19 | 73.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024_20230224_185436.json) | +| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | +| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/mobilenet_v2/mobilenet-v2-d8_deeplabv3plus_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | ### ADE20K diff --git a/configs/mobilenet_v2/mobilenet_v2.yml b/configs/mobilenet_v2/mobilenet_v2.yml index 69d73d568a..6d87401ce8 100644 --- a/configs/mobilenet_v2/mobilenet_v2.yml +++ b/configs/mobilenet_v2/mobilenet_v2.yml @@ -17,9 +17,10 @@ Models: - Task: Semantic Segmentation Dataset: Cityscapes Metrics: - mIoU: 61.54 + mIoU: 71.19 + mIoU(ms+flip): 73.34 Config: configs/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024/mobilenet-v2-d8_fcn_4xb2-80k_cityscapes-512x1024-20230224_185436-13fef4ea.pth - Name: mobilenet-v2-d8_pspnet_4xb2-80k_cityscapes-512x1024 In Collection: PSPNet Metadata: diff --git a/configs/ocrnet/README.md b/configs/ocrnet/README.md index 5cbfbabfce..4bd9c7d0b0 100644 --- a/configs/ocrnet/README.md +++ b/configs/ocrnet/README.md @@ -46,17 +46,17 @@ In this paper, we address the problem of semantic segmentation and focus on the #### HRNet backbone -| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | -| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 74.30 | 75.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json) | -| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) | -| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) | -| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) | -| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) | -| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) | -| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) | -| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) | -| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) | +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 76.61 | 78.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-40k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-40k_cityscapes-512x1024.pyy) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-80k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18s_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr18_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/ocrnet/ocrnet_hr48_4xb2-160k_cityscapes-512x1024.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) | #### ResNet backbone diff --git a/configs/ocrnet/ocrnet.yml b/configs/ocrnet/ocrnet.yml index a81aec2c75..20002e8864 100644 --- a/configs/ocrnet/ocrnet.yml +++ b/configs/ocrnet/ocrnet.yml @@ -33,10 +33,10 @@ Models: - Task: Semantic Segmentation Dataset: Cityscapes Metrics: - mIoU: 74.3 - mIoU(ms+flip): 75.95 + mIoU: 76.61 + mIoU(ms+flip): 78.01 Config: configs/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024.py - Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024/ocrnet_hr18s_4xb2-40k_cityscapes-512x1024_20230227_145026-6c052a14.pth - Name: ocrnet_hr18_4xb2-40k_cityscapes-512x1024 In Collection: OCRNet Metadata: diff --git a/demo/MMSegmentation_Tutorial.ipynb b/demo/MMSegmentation_Tutorial.ipynb index 89d6e52613..1d92342ae6 100644 --- a/demo/MMSegmentation_Tutorial.ipynb +++ b/demo/MMSegmentation_Tutorial.ipynb @@ -460,12 +460,8 @@ "outputs": [], "source": [ "from mmengine.runner import Runner\n", - "from mmseg.utils import register_all_modules\n", "\n", - "# register all modules in mmseg into the registries\n", - "# do not init the default scope here because it will be init in the runner\n", - "register_all_modules(init_default_scope=False)\n", - "runner = Runner.from_cfg(cfg)\n" + "runner = Runner.from_cfg(cfg)" ] }, { @@ -523,7 +519,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3.8.5 ('tensorflow')", + "display_name": "Python 3.10.6 ('pt1.12')", "language": "python", "name": "python3" }, @@ -537,7 +533,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.6" }, "pycharm": { "stem_cell": { @@ -550,7 +546,7 @@ }, "vscode": { "interpreter": { - "hash": "20d4b83e0c8b3730b580c42434163d64f4b735d580303a8fade7c849d4d29eba" + "hash": "0442e67aee3d9cbb788fa6e86d60c4ffa94ad7f1943c65abfecb99a6f4696c58" } } }, diff --git a/demo/image_demo.py b/demo/image_demo.py index fe11b7693a..231aacb9dd 100644 --- a/demo/image_demo.py +++ b/demo/image_demo.py @@ -4,7 +4,6 @@ from mmengine.model import revert_sync_batchnorm from mmseg.apis import inference_model, init_model, show_result_pyplot -from mmseg.utils import register_all_modules def main(): @@ -24,8 +23,6 @@ def main(): '--title', default='result', help='The image identifier.') args = parser.parse_args() - register_all_modules() - # build the model from a config file and a checkpoint file model = init_model(args.config, args.checkpoint, device=args.device) if args.device == 'cpu': diff --git a/demo/image_demo_with_inferencer.py b/demo/image_demo_with_inferencer.py new file mode 100644 index 0000000000..26bf0f257c --- /dev/null +++ b/demo/image_demo_with_inferencer.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser + +from mmseg.apis import MMSegInferencer + + +def main(): + parser = ArgumentParser() + parser.add_argument('img', help='Image file') + parser.add_argument('model', help='Config file') + parser.add_argument('--checkpoint', default=None, help='Checkpoint file') + parser.add_argument( + '--out-dir', default='', help='Path to save result file') + parser.add_argument( + '--show', + action='store_true', + default=False, + help='Whether to display the drawn image.') + parser.add_argument( + '--dataset-name', + default='cityscapes', + help='Color palette used for segmentation map') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + args = parser.parse_args() + + # build the model from a config file and a checkpoint file + mmseg_inferencer = MMSegInferencer( + args.model, + args.checkpoint, + dataset_name=args.dataset_name, + device=args.device) + + # test a single image + mmseg_inferencer( + args.img, show=args.show, out_dir=args.out_dir, opacity=args.opacity) + + +if __name__ == '__main__': + main() diff --git a/demo/inference_demo.ipynb b/demo/inference_demo.ipynb index f05a947483..3a29a96466 100644 --- a/demo/inference_demo.ipynb +++ b/demo/inference_demo.ipynb @@ -2,9 +2,28 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mkdir: ../checkpoints: File exists\n", + "--2023-02-23 19:23:01-- https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth\n", + "正在解析主机 download.openmmlab.com (download.openmmlab.com)... 116.0.89.205, 116.0.89.209, 116.0.89.207, ...\n", + "正在连接 download.openmmlab.com (download.openmmlab.com)|116.0.89.205|:443... 已连接。\n", + "已发出 HTTP 请求,正在等待回应... 200 OK\n", + "长度:196205945 (187M) [application/octet-stream]\n", + "正在保存至: “../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth.3”\n", + "\n", + "pspnet_r50-d8_512x1 100%[===================>] 187.12M 861KB/s 用时 2m 56s \n", + "\n", + "2023-02-23 19:25:57 (1.06 MB/s) - 已保存 “../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth.3” [196205945/196205945])\n", + "\n" + ] + } + ], "source": [ "!mkdir ../checkpoints\n", "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P ../checkpoints" @@ -12,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "pycharm": { "is_executing": true @@ -24,14 +43,12 @@ "import mmcv\n", "import matplotlib.pyplot as plt\n", "from mmengine.model.utils import revert_sync_batchnorm\n", - "from mmseg.apis import init_model, inference_model, show_result_pyplot\n", - "from mmseg.utils import register_all_modules\n", - "register_all_modules()" + "from mmseg.apis import init_model, inference_model, show_result_pyplot" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "pycharm": { "is_executing": true @@ -39,15 +56,33 @@ }, "outputs": [], "source": [ - "config_file = '../configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n", + "config_file = '../configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py'\n", "checkpoint_file = '../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/xxc/Desktop/pjlab/mmsegv2/mmseg/models/builder.py:36: UserWarning: ``build_loss`` would be deprecated soon, please use ``mmseg.registry.MODELS.build()`` \n", + " warnings.warn('``build_loss`` would be deprecated soon, please use '\n", + "/Users/xxc/Desktop/pjlab/mmsegv2/mmseg/models/losses/cross_entropy_loss.py:235: UserWarning: Default ``avg_non_ignore`` is False, if you would like to ignore the certain label and average loss over non-ignore labels, which is the same with PyTorch official cross_entropy, set ``avg_non_ignore=True``.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loads checkpoint by local backend from path: ../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth\n" + ] + } + ], "source": [ "# build the model from a config file and a checkpoint file\n", "model = init_model(config_file, checkpoint_file, device='cuda:0')" @@ -55,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -68,9 +103,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/anaconda3/envs/pt1.13/lib/python3.10/site-packages/mmengine/visualization/visualizer.py:163: UserWarning: `Visualizer` backend is not initialized because save_dir is None.\n", + " warnings.warn('`Visualizer` backend is not initialized '\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# show the results\n", "vis_result = show_result_pyplot(model, img, result)\n", @@ -87,7 +151,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4 ('pt1.11-v2')", + "display_name": "pt1.13", "language": "python", "name": "python3" }, @@ -101,7 +165,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.10.9" }, "pycharm": { "stem_cell": { @@ -114,7 +178,7 @@ }, "vscode": { "interpreter": { - "hash": "fdab7187f8cbd4ce42bbf864ddb4c4693e7329271a15a7fa96e4bdb82b9302c9" + "hash": "f61d5b8fecdd960739697f6c2860080d7b76a5be5d896cb034bdb275ab3ddda0" } } }, diff --git a/demo/video_demo.py b/demo/video_demo.py index 3eb326b7af..7e6f3d605c 100644 --- a/demo/video_demo.py +++ b/demo/video_demo.py @@ -6,7 +6,6 @@ from mmseg.apis import inference_model, init_model from mmseg.apis.inference import show_result_pyplot -from mmseg.utils import register_all_modules def main(): @@ -53,8 +52,6 @@ def main(): assert args.show or args.output_file, \ 'At least one output should be enabled.' - register_all_modules() - # build the model from a config file and a checkpoint file model = init_model(args.config, args.checkpoint, device=args.device) if args.device == 'cpu': diff --git a/docker/serve/Dockerfile b/docker/serve/Dockerfile index 5ae1eb607d..cf127ddbe6 100644 --- a/docker/serve/Dockerfile +++ b/docker/serve/Dockerfile @@ -4,7 +4,7 @@ ARG CUDNN="8" FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel ARG MMCV="2.0.0rc4" -ARG MMSEG="1.0.0rc5" +ARG MMSEG="1.0.0rc6" ENV PYTHONUNBUFFERED TRUE diff --git a/docs/en/advanced_guides/add_dataset.md b/docs/en/advanced_guides/add_datasets.md similarity index 99% rename from docs/en/advanced_guides/add_dataset.md rename to docs/en/advanced_guides/add_datasets.md index 4149014e64..f33f3d32c6 100644 --- a/docs/en/advanced_guides/add_dataset.md +++ b/docs/en/advanced_guides/add_datasets.md @@ -1,4 +1,4 @@ -# Add New Datasets +# \[WIP\] Add New Datasets ## Customize datasets by reorganizing data diff --git a/docs/en/advanced_guides/add_metrics.md b/docs/en/advanced_guides/add_metrics.md new file mode 100644 index 0000000000..0a25a81fc4 --- /dev/null +++ b/docs/en/advanced_guides/add_metrics.md @@ -0,0 +1 @@ +# Add New Metrics diff --git a/docs/en/advanced_guides/add_transform.md b/docs/en/advanced_guides/add_transform.md deleted file mode 100644 index 69de9d317b..0000000000 --- a/docs/en/advanced_guides/add_transform.md +++ /dev/null @@ -1,37 +0,0 @@ -# Adding New Data Transforms - -1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict. - - ```python - from mmseg.datasets import TRANSFORMS - @TRANSFORMS.register_module() - class MyTransform: - def transform(self, results): - results['dummy'] = True - return results - ``` - -2. Import the new class. - - ```python - from .my_pipeline import MyTransform - ``` - -3. Use it in config files. - - ```python - crop_size = (512, 1024) - train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='RandomResize', - scale=(2048, 1024), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='MyTransform'), - dict(type='PackSegInputs'), - ] - ``` diff --git a/docs/en/advanced_guides/add_transforms.md b/docs/en/advanced_guides/add_transforms.md new file mode 100644 index 0000000000..ca336ce046 --- /dev/null +++ b/docs/en/advanced_guides/add_transforms.md @@ -0,0 +1,52 @@ +# Adding New Data Transforms + +## Customization data transformation + +The customized data transformation must inherited from `BaseTransform` and implement `transform` function. +Here we use a simple flipping transformation as example: + +```python +import random +import mmcv +from mmcv.transforms import BaseTransform, TRANSFORMS + +@TRANSFORMS.register_module() +class MyFlip(BaseTransform): + def __init__(self, direction: str): + super().__init__() + self.direction = direction + + def transform(self, results: dict) -> dict: + img = results['img'] + results['img'] = mmcv.imflip(img, direction=self.direction) + return results +``` + +Moreover, import the new class. + +```python +from .my_pipeline import MyFlip +``` + +Thus, we can instantiate a `MyFlip` object and use it to process the data dict. + +```python +import numpy as np + +transform = MyFlip(direction='horizontal') +data_dict = {'img': np.random.rand(224, 224, 3)} +data_dict = transform(data_dict) +processed_img = data_dict['img'] +``` + +Or, we can use `MyFlip` transformation in data pipeline in our config file. + +```python +pipeline = [ + ... + dict(type='MyFlip', direction='horizontal'), + ... +] +``` + +Note that if you want to use `MyFlip` in config, you must ensure the file containing `MyFlip` is imported during runtime. diff --git a/docs/en/advanced_guides/customize_runtime.md b/docs/en/advanced_guides/customize_runtime.md index f138c226fd..33281bfe4a 100644 --- a/docs/en/advanced_guides/customize_runtime.md +++ b/docs/en/advanced_guides/customize_runtime.md @@ -1,245 +1,168 @@ # Customize Runtime Settings -## Customize optimization settings +## Customize hooks -### Customize optimizer supported by Pytorch +### Step 1: Implement a new hook -We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files. -For example, if you want to use `ADAM` (note that the performance could drop a lot), the modification could be as the following. +MMEngine has implemented commonly used [hooks](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/hook.md) for training and test, +When users have requirements for customization, they can follow examples below. +For example, if some hyper-parameter of the model needs to be changed when model training, we can implement a new hook for it: ```python -optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001) -``` - -To modify the learning rate of the model, the users only need to modify the `lr` in the config of optimizer. The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch. - -### Customize self-implemented optimizer +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Sequence -#### 1. Define a new optimizer +from mmengine.hooks import Hook +from mmengine.model import is_model_wrapper -A customized optimizer could be defined as following. - -Assume you want to add a optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`. -You need to create a new directory named `mmseg/core/optimizer`. -And then implement the new optimizer in a file, e.g., in `mmseg/core/optimizer/my_optimizer.py`: - -```python -from .registry import OPTIMIZERS -from torch.optim import Optimizer +from mmseg.registry import HOOKS -@OPTIMIZERS.register_module() -class MyOptimizer(Optimizer): +@HOOKS.register_module() +class NewHook(Hook): + """Docstring for NewHook. + """ - def __init__(self, a, b, c) + def __init__(self, a: int, b: int) -> None: + self.a = a + self.b = b + def before_train_iter(self, + runner, + batch_idx: int, + data_batch: Optional[Sequence[dict]] = None) -> None: + cur_iter = runner.iter + # acquire this model when it is in a wrapper + if is_model_wrapper(runner.model): + model = runner.model.module + model.hyper_parameter = self.a * cur_iter + self.b ``` -#### 2. Add the optimizer to registry - -To find the above module defined above, this module should be imported into the main namespace at first. There are two options to achieve it. +### Step 2: Import a new hook -- Modify `mmseg/core/optimizer/__init__.py` to import it. +The module which is defined above needs to be imported into main namespace first to ensure being registered. +We assume `NewHook` is implemented in `mmseg/engine/hooks/new_hook.py`, there are two ways to import it: - The newly defined module should be imported in `mmseg/core/optimizer/__init__.py` so that the registry will - find the new module and add it: +- Import it by modifying `mmseg/engine/hooks/__init__.py`. + Modules should be imported in `mmseg/engine/hooks/__init__.py` thus these new modules can be found and added by registry. ```python -from .my_optimizer import MyOptimizer +from .new_hook import NewHook + +__all__ = [..., NewHook] ``` -- Use `custom_imports` in the config to manually import it +- Import it manually by `custom_imports` in config file. ```python -custom_imports = dict(imports=['mmseg.core.optimizer.my_optimizer'], allow_failed_imports=False) +custom_imports = dict(imports=['mmseg.engine.hooks.new_hook'], allow_failed_imports=False) ``` -The module `mmseg.core.optimizer.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered. -Note that only the package containing the class `MyOptimizer` should be imported. -`mmseg.core.optimizer.my_optimizer.MyOptimizer` **cannot** be imported directly. - -Actually users can use a totally different file directory structure using this importing method, as long as the module root can be located in `PYTHONPATH`. - -#### 3. Specify the optimizer in the config file +### Step 3: Modify config file -Then you can use `MyOptimizer` in `optimizer` field of config files. -In the configs, the optimizers are defined by the field `optimizer` like the following: +Users can set and use customized hooks in training and test followed methods below. +The execution priority of hooks at the same place of `Runner` can be referred [here](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/hook.md#built-in-hooks), +Default priority of customized hook is `NORMAL`. ```python -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +custom_hooks = [ + dict(type='NewHook', a=a_value, b=b_value, priority='ABOVE_NORMAL') +] ``` -To use your own optimizer, the field can be changed to - -```python -optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value) -``` +## Customize optimizer -### Customize optimizer constructor +### Step 1: Implement a new optimizer -Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers. -The users can do those fine-grained parameter tuning through customizing optimizer constructor. +We recommend the customized optimizer implemented in `mmseg/engine/optimizers/my_optimizer.py`. Here is an example of a new optimizer `MyOptimizer` which has parameters `a`, `b` and `c`: ```python -from mmcv.utils import build_from_cfg - -from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS -from mmseg.utils import get_root_logger -from .my_optimizer import MyOptimizer - - -@OPTIMIZER_BUILDERS.register_module() -class MyOptimizerConstructor(object): +from mmseg.registry import OPTIMIZERS +from torch.optim import Optimizer - def __init__(self, optim_wrapper_cfg, paramwise_cfg=None): - def __call__(self, model): - - return my_optimizer +@OPTIMIZERS.register_module() +class MyOptimizer(Optimizer): + def __init__(self, a, b, c) ``` -The default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/optimizer/default_constructor.py#L11), which could also serve as a template for new optimizer constructor. - -### Additional settings - -Tricks not implemented by the optimizer should be implemented through optimizer constructor (e.g., set parameter-wise learning rates) or hooks. We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings. - -- __Use gradient clip to stabilize training__: - Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below: - - ```python - optimizer_config = dict( - _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) - ``` - - If your config inherits the base config which already sets the `optimizer_config`, you might need `_delete_=True` to override the unnecessary settings. See the [config documentation](https://mmsegmentation.readthedocs.io/en/latest/config.html) for more details. - -- __Use momentum schedule to accelerate model convergence__: - We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way. - Momentum scheduler is usually used with LR scheduler, for example, the following config is used in 3D detection to accelerate convergence. - For more details, please refer to the implementation of [CyclicLrUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327) and [CyclicMomentumUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130). - - ```python - lr_config = dict( - policy='cyclic', - target_ratio=(10, 1e-4), - cyclic_times=1, - step_ratio_up=0.4, - ) - momentum_config = dict( - policy='cyclic', - target_ratio=(0.85 / 0.95, 1), - cyclic_times=1, - step_ratio_up=0.4, - ) - ``` - -## Customize training schedules - -By default we use step learning rate with 40k/80k schedule, this calls [`PolyLrUpdaterHook`](https://github.com/open-mmlab/mmcv/blob/826d3a7b68596c824fa1e2cb89b6ac274f52179c/mmcv/runner/hooks/lr_updater.py#L196) in MMCV. -We support many other learning rate schedule [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py), such as `CosineAnnealing` and `Poly` schedule. Here are some examples - -- Step schedule: - - ```python - lr_config = dict(policy='step', step=[9, 10]) - ``` - -- ConsineAnnealing schedule: - - ```python - lr_config = dict( - policy='CosineAnnealing', - warmup='linear', - warmup_iters=1000, - warmup_ratio=1.0 / 10, - min_lr_ratio=1e-5) - ``` +### Step 2: Import a new optimizer -## Customize workflow +The module which is defined above needs to be imported into main namespace first to ensure being registered. +We assume `MyOptimizer` is implemented in `mmseg/engine/optimizers/my_optimizer.py`, there are two ways to import it: -Workflow is a list of (phase, epochs) to specify the running order and epochs. -By default it is set to be +- Import it by modifying `mmseg/engine/optimizers/__init__.py`. + Modules should be imported in `mmseg/engine/optimizers/__init__.py` thus these new modules can be found and added by registry. ```python -workflow = [('train', 1)] +from .my_optimizer import MyOptimizer ``` -which means running 1 epoch for training. -Sometimes user may want to check some metrics (e.g. loss, accuracy) about the model on the validate set. -In such case, we can set the workflow as +- Import it manually by `custom_imports` in config file. ```python -[('train', 1), ('val', 1)] +custom_imports = dict(imports=['mmseg.engine.optimizers.my_optimizer'], allow_failed_imports=False) ``` -so that 1 epoch for training and 1 epoch for validation will be run iteratively. - -:::{note} +### Step 3: Modify config file -1. The parameters of model will not be updated during val epoch. -2. Keyword `total_epochs` in the config only controls the number of training epochs and will not affect the validation workflow. -3. Workflows `[('train', 1), ('val', 1)]` and `[('train', 1)]` will not change the behavior of `EvalHook` because `EvalHook` is called by `after_train_epoch` and validation workflow only affect hooks that are called through `after_val_epoch`. Therefore, the only difference between `[('train', 1), ('val', 1)]` and `[('train', 1)]` is that the runner will calculate losses on validation set after each training epoch. +Then it needs to modify `optimizer` in `optim_wrapper` of config file, if users want to use customized `MyOptimizer`, it can be modified as: -::: +```python +optim_wrapper = dict(type='OptimWrapper', + optimizer=dict(type='MyOptimizer', + a=a_value, b=b_value, c=c_value), + clip_grad=None) +``` -## Customize hooks +## Customize optimizer constructor -### Use hooks implemented in MMCV +### Step 1: Implement a new optimizer constructor -If the hook is already implemented in MMCV, you can directly modify the config to use the hook as below +Optimizer constructor is used to create optimizer and optimizer wrapper for model training, which has powerful functions like specifying learning rate and weight decay for different model layers. +Here is an example for a customized optimizer constructor. ```python -custom_hooks = [ - dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL') -] -``` +from mmengine.optim import DefaultOptimWrapperConstructor +from mmseg.registry import OPTIM_WRAPPER_CONSTRUCTORS -### Modify default runtime hooks +@OPTIM_WRAPPER_CONSTRUCTORS.register_module() +class LearningRateDecayOptimizerConstructor(DefaultOptimWrapperConstructor): + def __init__(self, optim_wrapper_cfg, paramwise_cfg=None): -There are some common hooks that are not registered through `custom_hooks`, they are + def __call__(self, model): -- log_config -- checkpoint_config -- evaluation -- lr_config -- optimizer_config -- momentum_config + return my_optimizer +``` -In those hooks, only the logger hook has the `VERY_LOW` priority, others' priority are `NORMAL`. -The above-mentioned tutorials already covers how to modify `optimizer_config`, `momentum_config`, and `lr_config`. -Here we reveals how what we can do with `log_config`, `checkpoint_config`, and `evaluation`. +Default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py#L19). +It can also be used as base class of new optimizer constructor. -#### Checkpoint config +### Step 2: Import a new optimizer constructor -The MMCV runner will use `checkpoint_config` to initialize [`CheckpointHook`](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/hooks/checkpoint.py#L9). +The module which is defined above needs to be imported into main namespace first to ensure being registered. +We assume `MyOptimizerConstructor` is implemented in `mmseg/engine/optimizers/my_optimizer_constructor.py`, there are two ways to import it: + +- Import it by modifying `mmseg/engine/optimizers/__init__.py`. + Modules should be imported in `mmseg/engine/optimizers/__init__.py` thus these new modules can be found and added by registry. ```python -checkpoint_config = dict(interval=1) +from .my_optimizer_constructor import MyOptimizerConstructor ``` -The users could set `max_keep_ckpts` to only save only small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`. More details of the arguments are [here](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.CheckpointHook) - -#### Log config - -The `log_config` wraps multiple logger hooks and enables to set intervals. Now MMCV supports `WandbLoggerHook`, `MlflowLoggerHook`, and `TensorboardLoggerHook`. -The detail usages can be found in the [doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook). +- Import it manually by `custom_imports` in config file. ```python -log_config = dict( - interval=50, - hooks=[ - dict(type='TextLoggerHook'), - dict(type='TensorboardLoggerHook') - ]) +custom_imports = dict(imports=['mmseg.engine.optimizers.my_optimizer_constructor'], allow_failed_imports=False) ``` -#### Evaluation config +### Step 3: Modify config file -The config of `evaluation` will be used to initialize the [`EvalHook`](https://github.com/open-mmlab/mmsegmentation/blob/e3f6f655d69b777341aec2fe8829871cc0beadcb/mmseg/core/evaluation/eval_hooks.py#L7). -Except the key `interval`, other arguments such as `metric` will be passed to the `dataset.evaluate()` +Then it needs to modify `constructor` in `optim_wrapper` of config file, if users want to use customized `MyOptimizerConstructor`, it can be modified as: ```python -evaluation = dict(interval=1, metric='mIoU') +optim_wrapper = dict(type='OptimWrapper', + constructor='MyOptimizerConstructor', + clip_grad=None) ``` diff --git a/docs/en/advanced_guides/datasets.md b/docs/en/advanced_guides/datasets.md index 733e2a26d9..a1b8044b3d 100644 --- a/docs/en/advanced_guides/datasets.md +++ b/docs/en/advanced_guides/datasets.md @@ -15,8 +15,8 @@ Instantiate Cityscapes training dataset: ```python from mmseg.datasets import CityscapesDataset -from mmseg.utils import register_all_modules -register_all_modules() +from mmengine.registry import init_default_scope +init_default_scope('mmseg') data_root = 'data/cityscapes/' data_prefix=dict(img_path='leftImg8bit/train', seg_map_path='gtFine/train') diff --git a/docs/en/advanced_guides/evaluation.md b/docs/en/advanced_guides/evaluation.md index 55728281a9..ee5a927ff7 100644 --- a/docs/en/advanced_guides/evaluation.md +++ b/docs/en/advanced_guides/evaluation.md @@ -81,7 +81,7 @@ The arguments of the constructor: - `process` method processes one batch of data and data_samples. - `compute_metrics` method computes the metrics from processed results. -#### IoUMetric.process +### IoUMetric.process Parameters: @@ -92,7 +92,7 @@ Returns: This method doesn't have returns since the processed results would be stored in `self.results`, which will be used to compute the metrics when all batches have been processed. -#### IoUMetric.compute_metrics +### IoUMetric.compute_metrics Parameters: diff --git a/docs/en/advanced_guides/index.rst b/docs/en/advanced_guides/index.rst index 1cae420c1c..53ef8c5e73 100644 --- a/docs/en/advanced_guides/index.rst +++ b/docs/en/advanced_guides/index.rst @@ -19,7 +19,7 @@ Component Customization .. toctree:: :maxdepth: 1 - add_modules.md + add_models.md add_datasets.md add_transforms.md add_metrics.md diff --git a/docs/en/advanced_guides/training_tricks.md b/docs/en/advanced_guides/training_tricks.md index 6c43230c71..8fa89131d5 100644 --- a/docs/en/advanced_guides/training_tricks.md +++ b/docs/en/advanced_guides/training_tricks.md @@ -1,4 +1,4 @@ -# Training Tricks +# \[WIP\] Training Tricks MMSegmentation support following training tricks out of box. diff --git a/docs/en/advanced_guides/transforms.md b/docs/en/advanced_guides/transforms.md index d42d61a9ef..e0c4155b57 100644 --- a/docs/en/advanced_guides/transforms.md +++ b/docs/en/advanced_guides/transforms.md @@ -6,7 +6,9 @@ The structure of this guide is as follows: - [Data Transforms](#data-transforms) - [Design of Data pipelines](#design-of-data-pipelines) - - [Customization data transformation](#customization-data-transformation) + - [Data loading](#data-loading) + - [Pre-processing](#pre-processing) + - [Formatting](#formatting) ## Design of Data pipelines @@ -125,48 +127,3 @@ The position of random contrast is in second or second to last(mode 0 or 1 below - add: `inputs`, `data_sample` - remove: keys specified by `meta_keys` (merged into the metainfo of data_sample), all other keys - -## Customization data transformation - -The customized data transformation must inherited from `BaseTransform` and implement `transform` function. -Here we use a simple flipping transformation as example: - -```python -import random -import mmcv -from mmcv.transforms import BaseTransform, TRANSFORMS - -@TRANSFORMS.register_module() -class MyFlip(BaseTransform): - def __init__(self, direction: str): - super().__init__() - self.direction = direction - - def transform(self, results: dict) -> dict: - img = results['img'] - results['img'] = mmcv.imflip(img, direction=self.direction) - return results -``` - -Thus, we can instantiate a `MyFlip` object and use it to process the data dict. - -```python -import numpy as np - -transform = MyFlip(direction='horizontal') -data_dict = {'img': np.random.rand(224, 224, 3)} -data_dict = transform(data_dict) -processed_img = data_dict['img'] -``` - -Or, we can use `MyFlip` transformation in data pipeline in our config file. - -```python -pipeline = [ - ... - dict(type='MyFlip', direction='horizontal'), - ... -] -``` - -Note that if you want to use `MyFlip` in config, you must ensure the file containing `MyFlip` is imported during runtime. diff --git a/docs/en/api.rst b/docs/en/api.rst index 94f64313d0..2f1a25ef9d 100644 --- a/docs/en/api.rst +++ b/docs/en/api.rst @@ -11,11 +11,6 @@ datasets .. automodule:: mmseg.datasets :members: -samplers -^^^^^^^^^^ -.. automodule:: mmseg.datasets.samplers - :members: - transforms ^^^^^^^^^^^^ .. automodule:: mmseg.datasets.transforms @@ -35,7 +30,7 @@ optimizers :members: mmseg.evaluation --------------- +----------------- metrics ^^^^^^^^^^ diff --git a/docs/en/device/npu.md b/docs/en/device/npu.md new file mode 100644 index 0000000000..a90d6ac433 --- /dev/null +++ b/docs/en/device/npu.md @@ -0,0 +1,39 @@ +# NPU (HUAWEI Ascend) + +## Usage + +Please refer to the [building documentation of MMCV](https://mmcv.readthedocs.io/en/latest/get_started/build.html#build-mmcv-full-on-ascend-npu-machine) to install MMCV on NPU devices + +Here we use 4 NPUs on your computer to train the model with the following command: + +```shell +bash tools/dist_train.sh configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py 4 +``` + +Also, you can use only one NPU to train the model with the following command: + +```shell +python tools/train.py configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py +``` + +## Models Results + +| Model | mIoU | Config | Download | +| :-----------------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------ | +| [deeplabv3](<>) | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024_20230115_205626.json) | +| [deeplabv3plus](<>) | 79.23 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024_20230116_043450.json) | +| [hrnet](<>) | 78.1 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_hr18_4xb2-40k_cityscapes-512x1024_20230116_215821.json) | +| [fcn](<>) | 74.15 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_r50-d8_4xb2-40k_cityscapes-512x1024_20230111_083014.json) | +| [icnet](<>) | 69.25 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/icnet_r50-d8_4xb2-80k_cityscapes-832x832_20230119_002929.json) | +| [pspnet](<>) | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024_20230114_042721.json) | +| [unet](<>) | 68.86 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024_20230129_224750.json) | +| [upernet](<>) | 77.81 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/upernet_r50_4xb2-40k_cityscapes-512x1024_20230129_014634.json) | +| [apcnet](<>) | 78.02 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024_20230209_212545.json) | +| [bisenetv1](<>) | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024_20230201_023946.json) | +| [bisenetv2](<>) | 72.44 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024_20230205_215606.json) | + +**Notes:** + +- If not specially marked, the results on NPU with amp are the basically same as those on the GPU with FP32. + +**All above models are provided by Huawei Ascend group.** diff --git a/docs/en/get_started.md b/docs/en/get_started.md index 313501e0d3..cf861b1fe8 100644 --- a/docs/en/get_started.md +++ b/docs/en/get_started.md @@ -91,10 +91,8 @@ Option (b). If you install mmsegmentation with pip, open you python interpreter ```python from mmseg.apis import inference_model, init_model, show_result_pyplot -from mmseg.utils import register_all_modules import mmcv -register_all_modules() config_file = 'pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' checkpoint_file = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' diff --git a/docs/en/index.rst b/docs/en/index.rst index 63cfb924c4..cdf8622f94 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -45,13 +45,17 @@ Welcome to MMSegmentation's documentation! notes/changelog.md notes/faq.md +.. toctree:: + :caption: Device Support + + device/npu.md + .. toctree:: :caption: Switch Language switch_language.md - Indices and tables ================== diff --git a/docs/en/migration/interface.md b/docs/en/migration/interface.md index 1bc3d206e2..d75f8ec3ef 100644 --- a/docs/en/migration/interface.md +++ b/docs/en/migration/interface.md @@ -237,7 +237,7 @@ test_pipeline = [ ] img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] tta_pipeline = [ - dict(type='LoadImageFromFile', backend_args=dict(backend='local')), + dict(type='LoadImageFromFile', backend_args=None), dict( type='TestTimeAug', transforms=[ diff --git a/docs/en/migration/package.md b/docs/en/migration/package.md index 95fefe1310..c0aa1d6e31 100644 --- a/docs/en/migration/package.md +++ b/docs/en/migration/package.md @@ -82,7 +82,7 @@ Here is the changes of `mmseg.apis`: | Function | Changes | | :-------------------: | :---------------------------------------------- | | `init_segmentor` | Renamed to `init_model` | -| `inference_segmentor` | Rename to `inference_segmentor` | +| `inference_segmentor` | Rename to `inference_model` | | `show_result_pyplot` | Implemented based on `SegLocalVisualizer` | | `train_model` | Removed, use `runner.train` to train. | | `multi_gpu_test` | Removed, use `runner.test` to test. | diff --git a/docs/en/modelzoo_statistics.md b/docs/en/modelzoo_statistics.md new file mode 100644 index 0000000000..c8fa46d013 --- /dev/null +++ b/docs/en/modelzoo_statistics.md @@ -0,0 +1,102 @@ +# Model Zoo Statistics + +- Number of papers: 47 + + - ALGORITHM: 36 + - BACKBONE: 11 + +- Number of checkpoints: 612 + + - \[ALGORITHM\] [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) (16 ckpts) + + - \[ALGORITHM\] [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) (12 ckpts) + + - \[BACKBONE\] [BEiT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/beit) (2 ckpts) + + - \[ALGORITHM\] [BiSeNetV1](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1) (11 ckpts) + + - \[ALGORITHM\] [BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2) (4 ckpts) + + - \[ALGORITHM\] [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) (16 ckpts) + + - \[ALGORITHM\] [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) (2 ckpts) + + - \[BACKBONE\] [ConvNeXt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext) (6 ckpts) + + - \[ALGORITHM\] [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) (16 ckpts) + + - \[ALGORITHM\] [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) (41 ckpts) + + - \[ALGORITHM\] [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) (42 ckpts) + + - \[ALGORITHM\] [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) (12 ckpts) + + - \[ALGORITHM\] [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) (12 ckpts) + + - \[ALGORITHM\] [DPT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dpt) (1 ckpts) + + - \[ALGORITHM\] [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) (4 ckpts) + + - \[ALGORITHM\] [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) (12 ckpts) + + - \[ALGORITHM\] [ERFNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/erfnet) (1 ckpts) + + - \[ALGORITHM\] [FastFCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn) (12 ckpts) + + - \[ALGORITHM\] [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) (1 ckpts) + + - \[ALGORITHM\] [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) (41 ckpts) + + - \[ALGORITHM\] [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) (16 ckpts) + + - \[BACKBONE\] [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) (37 ckpts) + + - \[ALGORITHM\] [ICNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet) (12 ckpts) + + - \[ALGORITHM\] [ISANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet) (16 ckpts) + + - \[ALGORITHM\] [K-Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet) (7 ckpts) + + - \[BACKBONE\] [MAE](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mae) (1 ckpts) + + - \[ALGORITHM\] [Mask2Former](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mask2former) (13 ckpts) + + - \[ALGORITHM\] [MaskFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/maskformer) (4 ckpts) + + - \[BACKBONE\] [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) (8 ckpts) + + - \[BACKBONE\] [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) (4 ckpts) + + - \[ALGORITHM\] [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net) (16 ckpts) + + - \[ALGORITHM\] [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) (24 ckpts) + + - \[ALGORITHM\] [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) (4 ckpts) + + - \[BACKBONE\] [PoolFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/poolformer) (5 ckpts) + + - \[ALGORITHM\] [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) (16 ckpts) + + - \[ALGORITHM\] [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) (54 ckpts) + + - \[BACKBONE\] [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) (8 ckpts) + + - \[ALGORITHM\] [SegFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer) (13 ckpts) + + - \[ALGORITHM\] [Segmenter](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter) (5 ckpts) + + - \[ALGORITHM\] [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn) (4 ckpts) + + - \[ALGORITHM\] [SETR](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr) (7 ckpts) + + - \[ALGORITHM\] [STDC](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc) (4 ckpts) + + - \[BACKBONE\] [Swin Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin) (6 ckpts) + + - \[BACKBONE\] [Twins](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins) (12 ckpts) + + - \[ALGORITHM\] [UNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet) (25 ckpts) + + - \[ALGORITHM\] [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) (16 ckpts) + + - \[BACKBONE\] [Vision Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit) (11 ckpts) diff --git a/docs/en/notes/changelog.md b/docs/en/notes/changelog.md index 963cd6945b..518bfb314e 100644 --- a/docs/en/notes/changelog.md +++ b/docs/en/notes/changelog.md @@ -1,5 +1,40 @@ # Changelog of v1.x +## v1.0.0rc6(03/03/2023) + +### Highlights + +- Support MMSegInferencer ([#2413](https://github.com/open-mmlab/mmsegmentation/pull/2413), [#2658](https://github.com/open-mmlab/mmsegmentation/pull/2658)) +- Support REFUGE dataset ([#2554](https://github.com/open-mmlab/mmsegmentation/pull/2554)) + +### Features + +- Support auto import modules from registry ([#2481](https://github.com/open-mmlab/mmsegmentation/pull/2481)) +- Replace numpy ascontiguousarray with torch contiguous to speed-up ([#2604](https://github.com/open-mmlab/mmsegmentation/pull/2604)) +- Add browse_dataset.py tool ([#2649](https://github.com/open-mmlab/mmsegmentation/pull/2649)) + +### Bug fix + +- Rename and Fix bug of projects HieraSeg ([#2565](https://github.com/open-mmlab/mmsegmentation/pull/2565)) +- Add out_channels in `CascadeEncoderDecoder` and update OCRNet and MobileNet v2 results ([#2656](https://github.com/open-mmlab/mmsegmentation/pull/2656)) + +### Documentation + +- Add dataflow documentation of Chinese version ([#2652](https://github.com/open-mmlab/mmsegmentation/pull/2652)) +- Add custmized runtime documentation of English version ([#2533](https://github.com/open-mmlab/mmsegmentation/pull/2533)) +- Add documentation for visualizing feature map using wandb backend ([#2557](https://github.com/open-mmlab/mmsegmentation/pull/2557)) +- Add documentation for benchmark results on NPU (HUAWEI Ascend) ([#2569](https://github.com/open-mmlab/mmsegmentation/pull/2569), [#2596](https://github.com/open-mmlab/mmsegmentation/pull/2596), [#2610](https://github.com/open-mmlab/mmsegmentation/pull/2610)) +- Fix api name error in the migration doc ([#2601](https://github.com/open-mmlab/mmsegmentation/pull/2601)) +- Refine projects documentation ([#2586](https://github.com/open-mmlab/mmsegmentation/pull/2586)) +- Refine MMSegmentation documentation ([#2668](https://github.com/open-mmlab/mmsegmentation/pull/2668), [#2659](https://github.com/open-mmlab/mmsegmentation/pull/2659)) + +### New Contributors + +- @zccjjj made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2548 +- @liuruiqiang made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2554 +- @wangjiangben-hw made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2569 +- @jinxianwei made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/2557 + ## v1.0.0rc5(02/01/2023) ### Bug fix diff --git a/docs/en/notes/faq.md b/docs/en/notes/faq.md index bb09873cf0..fe5cac3834 100644 --- a/docs/en/notes/faq.md +++ b/docs/en/notes/faq.md @@ -1,4 +1,4 @@ -# Frequently Asked Questions (FAQ) +# \[WIP\] Frequently Asked Questions (FAQ) We list some common troubles faced by many users and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. If the contents here do not cover your issue, please create an issue using the [provided templates](https://github.com/open-mmlab/mmsegmentation/blob/master/.github/ISSUE_TEMPLATE/error-report.md/) and make sure you fill in all required information in the template. @@ -8,16 +8,21 @@ The compatible MMSegmentation, MMCV and MMEngine versions are as below. Please i | MMSegmentation version | MMCV version | MMEngine version | MMClassification (optional) version | MMDetection (optional) version | | :--------------------: | :----------------------------: | :---------------: | :---------------------------------: | :----------------------------: | -| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 | -| 1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 | -| 1.0.0rc5 | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>3.0.0rc5 | +| dev-1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 | +| 1.x branch | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 | +| 1.0.0rc6 | mmcv >= 2.0.0rc4 | MMEngine >= 0.5.0 | mmcls>=1.0.0rc0 | mmdet >= 3.0.0rc6 | +| 1.0.0rc5 | mmcv >= 2.0.0rc4 | MMEngine >= 0.2.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc6 | | 1.0.0rc4 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4, \<=3.0.0rc5 | | 1.0.0rc3 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 | | 1.0.0rc2 | mmcv == 2.0.0rc3 | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | mmdet>=3.0.0rc4 \<=3.0.0rc5 | | 1.0.0rc1 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required | | 1.0.0rc0 | mmcv >= 2.0.0rc1, \<=2.0.0rc3> | MMEngine >= 0.1.0 | mmcls>=1.0.0rc0 | Not required | -Notes: To install MMSegmentation 0.x and master branch, please refer to [the faq 0.x document](https://mmsegmentation.readthedocs.io/en/latest/faq.html#installation) to check compatible versions of MMCV. +Notes: + +- MMClassification and MMDetatction are optional for MMSegmentation. If you didn't install them, `ConvNeXt` (required MMClassification) and MaskFormer, Mask2Former (required MMDetection) cannot be used. We recommend to install them with source code. Please refer to [MMClasssication](https://github.com/open-mmlab/mmclassification) and [MMDetection](https://github.com/open-mmlab/mmdetection) for more details about their installation. + +- To install MMSegmentation 0.x and master branch, please refer to [the faq 0.x document](https://mmsegmentation.readthedocs.io/en/latest/faq.html#installation) to check compatible versions of MMCV. ## How to know the number of GPUs needed to train the model diff --git a/docs/en/user_guides/2_dataset_prepare.md b/docs/en/user_guides/2_dataset_prepare.md index e9c7683dc0..5d36061d89 100644 --- a/docs/en/user_guides/2_dataset_prepare.md +++ b/docs/en/user_guides/2_dataset_prepare.md @@ -145,6 +145,15 @@ mmsegmentation │ │ ├── ann_dir │ │ │ ├── train │ │ │ ├── val +│ ├── REFUGE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test ``` ### Cityscapes @@ -330,7 +339,7 @@ For Potsdam dataset, please run the following command to download and re-organiz python tools/dataset_converters/potsdam.py /path/to/potsdam ``` -In our default setting, it will generate 3,456 images for training and 2,016 images for validation. +In our default setting, it will generate 3456 images for training and 2016 images for validation. ### ISPRS Vaihingen @@ -383,7 +392,7 @@ You may need to follow the following structure for dataset preparation after dow python tools/dataset_converters/isaid.py /path/to/iSAID ``` -In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33,978 images for training and 11,644 images for validation. +In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation. ## LIP(Look Into Person) dataset @@ -436,7 +445,7 @@ cd ./RawData/Training Then create `train.txt` and `val.txt` to split dataset. -According to TransUNet, the following is the data set division. +According to TransUnet, the following is the data set division. train.txt @@ -500,7 +509,45 @@ Then, use this command to convert synapse dataset. python tools/dataset_converters/synapse.py --dataset-path /path/to/synapse ``` -In our default setting, it will generate 2,211 2D images for training and 1,568 2D images for validation. - Noted that MMSegmentation default evaluation metric (such as mean dice value) is calculated on 2D slice image, which is not comparable to results of 3D scan in some paper such as [TransUNet](https://arxiv.org/abs/2102.04306). + +### REFUGE + +Register in [REFUGE Challenge](https://refuge.grand-challenge.org) and download [REFUGE dataset](https://refuge.grand-challenge.org/REFUGE2Download). + +Then, unzip `REFUGE2.zip` and the contents of original datasets include: + +```none +├── REFUGE2 +│ ├── REFUGE2 +│ │ ├── Annotation-Training400.zip +│ │ ├── REFUGE-Test400.zip +│ │ ├── REFUGE-Test-GT.zip +│ │ ├── REFUGE-Training400.zip +│ │ ├── REFUGE-Validation400.zip +│ │ ├── REFUGE-Validation400-GT.zip +│ ├── __MACOSX +``` + +Please run the following command to convert REFUGE dataset: + +```shell +python tools/convert_datasets/refuge.py --raw_data_root=/path/to/refuge/REFUGE2/REFUGE2 +``` + +The script will make directory structure below: + +```none +│ ├── REFUGE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ │ │ ├── test +``` + +It includes 400 images for training, 400 images for validation and 400 images for testing which is the same as REFUGE 2018 dataset. diff --git a/docs/en/user_guides/3_inference.md b/docs/en/user_guides/3_inference.md index 6b6f6f7f3b..c9f4e62f10 100644 --- a/docs/en/user_guides/3_inference.md +++ b/docs/en/user_guides/3_inference.md @@ -4,13 +4,132 @@ MMSegmentation provides pre-trained models for semantic segmentation in [Model Z This note will show how to use existing models to inference on given images. As for how to test existing models on standard datasets, please see this [guide](./4_train_test.md) -## Inference API - MMSegmentation provides several interfaces for users to easily use pre-trained models for inference. -- [mmseg.apis.init_model](#mmsegapisinit_model) -- [mmseg.apis.inference_model](#mmsegapisinference_model) -- [mmseg.apis.show_result_pyplot](#mmsegapisshow_result_pyplot) +- [Tutorial 3: Inference with existing models](#tutorial-3-inference-with-existing-models) + - [Inferencer](#inferencer) + - [Basic Usage](#basic-usage) + - [Initialization](#initialization) + - [Visualize prediction](#visualize-prediction) + - [List model](#list-model) + - [Inference API](#inference-api) + - [mmseg.apis.init_model](#mmsegapisinit_model) + - [mmseg.apis.inference_model](#mmsegapisinference_model) + - [mmseg.apis.show_result_pyplot](#mmsegapisshow_result_pyplot) + +## Inferencer + +We provides the most **convenient** way to use the model in MMSegmentation `MMSegInferencer`. You can get segmentation mask for an image with only 3 lines of code. + +### Basic Usage + +The following example shows how to use `MMSegInferencer` to perform inference on a single image. + +``` +>>> from mmseg.apis import MMSegInferencer +>>> # Load models into memory +>>> inferencer = MMSegInferencer(model='deeplabv3plus_r18-d8_4xb2-80k_cityscapes-512x1024') +>>> # Inference +>>> inferencer('demo/demo.png', show=True) +``` + +The visualization result should look like: + +
+https://user-images.githubusercontent.com/76149310/221507927-ae01e3a7-016f-4425-b966-7b19cbbe494e.png +
+ +Moreover, you can use `MMSegInferencer` to process a list of images: + +``` +# Input a list of images +>>> images = [image1, image2, ...] # image1 can be a file path or a np.ndarray +>>> inferencer(images, show=True, wait_time=0.5) # wait_time is delay time, and 0 means forever. + +# Or input image directory +>>> images = $IMAGESDIR +>>> inferencer(images, show=True, wait_time=0.5) + +# Save visualized rendering color maps and predicted results +# out_dir is the directory to save the output results, img_out_dir and pred_out_dir are subdirectories of out_dir +# to save visualized rendering color maps and predicted results +>>> inferencer(images, out_dir='outputs', img_out_dir='vis', pred_out_dir='pred') +``` + +There is a optional parameter of inferencer, `return_datasamples`, whose default value is False, and +return value of inferencer is a `dict` type by default, including 2 keys 'visualization' and 'predictions'. +If `return_datasamples=True` inferencer will return [`SegDataSample`](../advanced_guides/structures.md), or list of it. + +``` +result = inferencer('demo/demo.png') +# result is a `dict` including 2 keys 'visualization' and 'predictions'. +# 'visualization' includes color segmentation map +print(result['visualization'].shape) +# (512, 683, 3) + +# 'predictions' includes segmentation mask with label indice +print(result['predictions'].shape) +# (512, 683) + +result = inferencer('demo/demo.png', return_datasamples=True) +print(type(result)) +# + +# Input a list of images +results = inferencer(images) +# The output is list +print(type(results['visualization']), results['visualization'][0].shape) +# (512, 683, 3) +print(type(results['predictions']), results['predictions'][0].shape) +# (512, 683) + +results = inferencer(images, return_datasamples=True) +# +print(type(results[0])) +# +``` + +### Initialization + +`MMSegInferencer` must be initialized from a `model`, which can be a model name or a `Config` even a path of config file. +The model names can be found in models' metafile, like one model name of maskformer is `maskformer_r50-d32_8xb2-160k_ade20k-512x512`, and if input model name and the weights of the model will be download automatically. Below are other input parameters: + +- weights (str, optional) - Path to the checkpoint. If it is not specified and model is a model name of metafile, the weights will be loaded + from metafile. Defaults to None. +- classes (list, optional) - Input classes for result rendering, as the prediction of segmentation + model is a segment map with label indices, `classes` is a list which includes + items responding to the label indices. If classes is not defined, visualizer will take `cityscapes` classes by default. Defaults to None. +- palette (list, optional) - Input palette for result rendering, which is a list of color palette + responding to the classes. If palette is not defined, visualizer will take `cityscapes` palette by default. Defaults to None. +- dataset_name (str, optional)[Dataset name or alias](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/class_names.py#L302-L317) + visulizer will use the meta information of the dataset i.e. classes and palette, + but the `classes` and `palette` have higher priority. Defaults to None. +- device (str, optional) - Device to run inference. If None, the available device will be automatically used. Defaults to None. +- scope (str, optional) - The scope of the model. Defaults to 'mmseg'. + +### Visualize prediction + +`MMSegInferencer` supports 4 parameters for visualize prediction, you can use them when call initialized inferencer: + +- show (bool) - Whether to display the image in a popup window. Defaults to False. +- wait_time (float) - The interval of show (s). Defaults to 0. +- img_out_dir (str) - Subdirectory of `out_dir`, used to save rendering color segmentation mask, so `out_dir` must be defined + if you would like to save predicted mask. Defaults to 'vis'. +- opacity (int, float) - The transparency of segmentation mask. Defaults to 0.8. + +The examples of these parameters is in [Basic Usage](#basic-usage) + +### List model + +There is a very easy to list all model names in MMSegmentation + +``` +>>> from mmseg.apis import MMSegInferencer +# models is a list of model names, and them will print automatically +>>> models = MMSegInferencer.list_models('mmseg') +``` + +## Inference API ### mmseg.apis.init_model @@ -31,14 +150,10 @@ Example: ```python from mmseg.apis import init_model -from mmseg.utils import register_all_modules config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' -# register all modules in mmseg into the registries -register_all_modules() - # initialize model without checkpoint model = init_model(config_path) @@ -76,14 +191,11 @@ Example: ```python from mmseg.apis import init_model, inference_model -from mmseg.utils import register_all_modules config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' img_path = 'demo/demo.png' -# register all modules in mmseg into the registries -register_all_modules() model = init_model(config_path, checkpoint_path) result = inference_model(model, img_path) @@ -115,14 +227,11 @@ Example: ```python from mmseg.apis import init_model, inference_model, show_result_pyplot -from mmseg.utils import register_all_modules config_path = 'configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' checkpoint_path = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' img_path = 'demo/demo.png' -# register all modules in mmseg into the registries -register_all_modules() # build the model from a config file and a checkpoint file model = init_model(config_path, checkpoint_path, device='cuda:0') diff --git a/docs/en/user_guides/deployment.md b/docs/en/user_guides/deployment.md index 036db997da..a23f4b404b 100644 --- a/docs/en/user_guides/deployment.md +++ b/docs/en/user_guides/deployment.md @@ -1,4 +1,4 @@ -# Deployment +# \[WIP\] Deployment > ## [Try the new MMDeploy to deploy your model](https://mmdeploy.readthedocs.io/) diff --git a/docs/en/user_guides/useful_tools.md b/docs/en/user_guides/useful_tools.md index 128397b804..0d8677854b 100644 --- a/docs/en/user_guides/useful_tools.md +++ b/docs/en/user_guides/useful_tools.md @@ -1,4 +1,4 @@ -# Useful Tools +# \[WIP\] Useful Tools Apart from training/testing scripts, We provide lots of useful tools under the `tools/` directory. diff --git a/docs/zh_cn/advanced_guides/add_datasets.md b/docs/zh_cn/advanced_guides/add_datasets.md index 512df8b983..4ea14934ed 100644 --- a/docs/zh_cn/advanced_guides/add_datasets.md +++ b/docs/zh_cn/advanced_guides/add_datasets.md @@ -1,4 +1,4 @@ -# 自定义数据集(待更新) +# 新增自定义数据集(待更新) ## 通过重新组织数据来定制数据集 diff --git a/docs/zh_cn/advanced_guides/add_metric.md b/docs/zh_cn/advanced_guides/add_metric.md deleted file mode 100644 index dfd94487d1..0000000000 --- a/docs/zh_cn/advanced_guides/add_metric.md +++ /dev/null @@ -1 +0,0 @@ -# 添加评测指标 diff --git a/docs/zh_cn/advanced_guides/add_metrics.md b/docs/zh_cn/advanced_guides/add_metrics.md new file mode 100644 index 0000000000..3a371e357e --- /dev/null +++ b/docs/zh_cn/advanced_guides/add_metrics.md @@ -0,0 +1 @@ +# 新增评测指标 (待更新) diff --git a/docs/zh_cn/advanced_guides/add_models.md b/docs/zh_cn/advanced_guides/add_models.md new file mode 100644 index 0000000000..3f86a0c7c6 --- /dev/null +++ b/docs/zh_cn/advanced_guides/add_models.md @@ -0,0 +1,3 @@ +# 新增模块(待更新) + +中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/add_models.md) diff --git a/docs/zh_cn/advanced_guides/add_modules.md b/docs/zh_cn/advanced_guides/add_modules.md deleted file mode 100644 index e20dadd242..0000000000 --- a/docs/zh_cn/advanced_guides/add_modules.md +++ /dev/null @@ -1,230 +0,0 @@ -# 自定义模型(待更新) - -## 自定义优化器 (optimizer) - -假设您想增加一个新的叫 `MyOptimizer` 的优化器,它的参数分别为 `a`, `b`, 和 `c`。 -您首先需要在一个文件里实现这个新的优化器,例如在 `mmseg/core/optimizer/my_optimizer.py` 里面: - -```python -from mmcv.runner import OPTIMIZERS -from torch.optim import Optimizer - - -@OPTIMIZERS.register_module -class MyOptimizer(Optimizer): - - def __init__(self, a, b, c) - -``` - -然后增加这个模块到 `mmseg/core/optimizer/__init__.py` 里面,这样注册器 (registry) 将会发现这个新的模块并添加它: - -```python -from .my_optimizer import MyOptimizer -``` - -之后您可以在配置文件的 `optimizer` 域里使用 `MyOptimizer`, -如下所示,在配置文件里,优化器被 `optimizer` 域所定义: - -```python -optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) -``` - -为了使用您自己的优化器,域可以被修改为: - -```python -optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value) -``` - -我们已经支持了 PyTorch 自带的全部优化器,唯一修改的地方是在配置文件里的 `optimizer` 域。例如,如果您想使用 `ADAM`,尽管数值表现会掉点,还是可以如下修改: - -```python -optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001) -``` - -使用者可以直接按照 PyTorch [文档教程](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) 去设置参数。 - -## 定制优化器的构造器 (optimizer constructor) - -对于优化,一些模型可能会有一些特别定义的参数,例如批归一化 (BatchNorm) 层里面的权重衰减 (weight decay)。 -使用者可以通过定制优化器的构造器来微调这些细粒度的优化器参数。 - -```python -from mmcv.utils import build_from_cfg - -from mmcv.runner import OPTIMIZER_BUILDERS -from .cocktail_optimizer import CocktailOptimizer - - -@OPTIMIZER_BUILDERS.register_module -class CocktailOptimizerConstructor(object): - - def __init__(self, optim_wrapper_cfg, paramwise_cfg=None): - - def __call__(self, model): - - return my_optimizer - -``` - -## 开发和增加新的组件(Module) - -MMSegmentation 里主要有2种组件: - -- 主干网络 (backbone): 通常是卷积网络的堆叠,来做特征提取,例如 ResNet, HRNet -- 解码头 (decoder head): 用于语义分割图的解码的组件(得到分割结果) - -### 添加新的主干网络 - -这里我们以 MobileNet 为例,展示如何增加新的主干组件: - -1. 创建一个新的文件 `mmseg/models/backbones/mobilenet.py` - -```python -import torch.nn as nn - -from ..registry import BACKBONES - - -@BACKBONES.register_module -class MobileNet(nn.Module): - - def __init__(self, arg1, arg2): - pass - - def forward(self, x): # should return a tuple - pass - - def init_weights(self, pretrained=None): - pass -``` - -2. 在 `mmseg/models/backbones/__init__.py` 里面导入模块 - -```python -from .mobilenet import MobileNet -``` - -3. 在您的配置文件里使用它 - -```python -model = dict( - ... - backbone=dict( - type='MobileNet', - arg1=xxx, - arg2=xxx), - ... -``` - -### 增加新的解码头 (decoder head)组件 - -在 MMSegmentation 里面,对于所有的分割头,我们提供一个基类解码头 [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/decode_head.py) 。 -所有新建的解码头都应该继承它。这里我们以 [PSPNet](https://arxiv.org/abs/1612.01105) 为例, -展示如何开发和增加一个新的解码头组件: - -首先,在 `mmseg/models/decode_heads/psp_head.py` 里添加一个新的解码头。 -PSPNet 中实现了一个语义分割的解码头。为了实现一个解码头,我们只需要在新构造的解码头中实现如下的3个函数: - -```python -@HEADS.register_module() -class PSPHead(BaseDecodeHead): - - def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): - super(PSPHead, self).__init__(**kwargs) - - def init_weights(self): - - def forward(self, inputs): - -``` - -接着,使用者需要在 `mmseg/models/decode_heads/__init__.py` 里面添加这个模块,这样对应的注册器 (registry) 可以查找并加载它们。 - -PSPNet的配置文件如下所示: - -```python -norm_cfg = dict(type='SyncBN', requires_grad=True) -model = dict( - type='EncoderDecoder', - pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth', - backbone=dict( - type='ResNetV1c', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - dilations=(1, 1, 2, 4), - strides=(1, 2, 1, 1), - norm_cfg=norm_cfg, - norm_eval=False, - style='pytorch', - contract_dilation=True), - decode_head=dict( - type='PSPHead', - in_channels=2048, - in_index=3, - channels=512, - pool_scales=(1, 2, 3, 6), - dropout_ratio=0.1, - num_classes=19, - norm_cfg=norm_cfg, - align_corners=False, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) - -``` - -### 增加新的损失函数 - -假设您想添加一个新的损失函数 `MyLoss` 到语义分割解码器里。 -为了添加一个新的损失函数,使用者需要在 `mmseg/models/losses/my_loss.py` 里面去实现它。 -`weighted_loss` 可以对计算损失时的每个样本做加权。 - -```python -import torch -import torch.nn as nn - -from ..builder import LOSSES -from .utils import weighted_loss - -@weighted_loss -def my_loss(pred, target): - assert pred.size() == target.size() and target.numel() > 0 - loss = torch.abs(pred - target) - return loss - -@LOSSES.register_module -class MyLoss(nn.Module): - - def __init__(self, reduction='mean', loss_weight=1.0): - super(MyLoss, self).__init__() - self.reduction = reduction - self.loss_weight = loss_weight - - def forward(self, - pred, - target, - weight=None, - avg_factor=None, - reduction_override=None): - assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = ( - reduction_override if reduction_override else self.reduction) - loss = self.loss_weight * my_loss( - pred, target, weight, reduction=reduction, avg_factor=avg_factor) - return loss -``` - -然后使用者需要在 `mmseg/models/losses/__init__.py` 里面添加它: - -```python -from .my_loss import MyLoss, my_loss - -``` - -为了使用它,修改 `loss_xxx` 域。之后您需要在解码头组件里修改 `loss_decode` 域。 -`loss_weight` 可以被用来对不同的损失函数做加权。 - -```python -loss_decode=dict(type='MyLoss', loss_weight=1.0)) -``` diff --git a/docs/zh_cn/advanced_guides/add_transforms.md b/docs/zh_cn/advanced_guides/add_transforms.md index 2fa55f0c05..58a2485e04 100644 --- a/docs/zh_cn/advanced_guides/add_transforms.md +++ b/docs/zh_cn/advanced_guides/add_transforms.md @@ -1,166 +1,3 @@ -# 自定义数据流程(待更新) +# 新增数据增强(待更新) -## 数据流程的设计 - -按照通常的惯例,我们使用 `Dataset` 和 `DataLoader` 做多线程的数据加载。`Dataset` 返回一个数据内容的字典,里面对应于模型前传方法的各个参数。 -因为在语义分割中,输入的图像数据具有不同的大小,我们在 MMCV 里引入一个新的 `DataContainer` 类别去帮助收集和分发不同大小的输入数据。 - -更多细节,请查看[这里](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) 。 - -数据的准备流程和数据集是解耦的。通常一个数据集定义了如何处理标注数据(annotations)信息,而一个数据流程定义了准备一个数据字典的所有步骤。一个流程包括了一系列操作,每个操作里都把一个字典作为输入,然后再输出一个新的字典给下一个变换操作。 - -这些操作可分为数据加载 (data loading),预处理 (pre-processing),格式变化 (formatting) 和测试时数据增强 (test-time augmentation)。 - -下面的例子就是 PSPNet 的一个流程: - -```python -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -crop_size = (512, 1024) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(2048, 1024), - # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -``` - -对于每个操作,我们列出它添加、更新、移除的相关字典域 (dict fields): - -### 数据加载 Data loading - -`LoadImageFromFile` - -- 增加: img, img_shape, ori_shape - -`LoadAnnotations` - -- 增加: gt_semantic_seg, seg_fields - -### 预处理 Pre-processing - -`Resize` - -- 增加: scale, scale_idx, pad_shape, scale_factor, keep_ratio -- 更新: img, img_shape, \*seg_fields - -`RandomFlip` - -- 增加: flip -- 更新: img, \*seg_fields - -`Pad` - -- 增加: pad_fixed_size, pad_size_divisor -- 更新: img, pad_shape, \*seg_fields - -`RandomCrop` - -- 更新: img, pad_shape, \*seg_fields - -`Normalize` - -- 增加: img_norm_cfg -- 更新: img - -`SegRescale` - -- 更新: gt_semantic_seg - -`PhotoMetricDistortion` - -- 更新: img - -### 格式 Formatting - -`ToTensor` - -- 更新: 由 `keys` 指定 - -`ImageToTensor` - -- 更新: 由 `keys` 指定 - -`Transpose` - -- 更新: 由 `keys` 指定 - -`ToDataContainer` - -- 更新: 由 `keys` 指定 - -`DefaultFormatBundle` - -- 更新: img, gt_semantic_seg - -`Collect` - -- 增加: img_meta (the keys of img_meta is specified by `meta_keys`) -- 移除: all other keys except for those specified by `keys` - -### 测试时数据增强 Test time augmentation - -`MultiScaleFlipAug` - -## 拓展和使用自定义的流程 - -1. 在任何一个文件里写一个新的流程,例如 `my_pipeline.py`,它以一个字典作为输入并且输出一个字典 - - ```python - from mmseg.datasets import PIPELINES - - @PIPELINES.register_module() - class MyTransform: - - def __call__(self, results): - results['dummy'] = True - return results - ``` - -2. 导入一个新类 - - ```python - from .my_pipeline import MyTransform - ``` - -3. 在配置文件里使用它 - - ```python - img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - crop_size = (512, 1024) - train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), - dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), - dict(type='MyTransform'), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_semantic_seg']), - ] - ``` +中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/add_transform.md) diff --git a/docs/zh_cn/advanced_guides/data_flow.md b/docs/zh_cn/advanced_guides/data_flow.md index 960b4e6586..0716d36d1b 100644 --- a/docs/zh_cn/advanced_guides/data_flow.md +++ b/docs/zh_cn/advanced_guides/data_flow.md @@ -1 +1,90 @@ # 数据流 + +在本章节中,我们将介绍 [Runner](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/runner.html) 管理的内部模块之间的数据流和数据格式约定。 + +## 数据流概述 + +[Runner](https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/design/runner.md) 相当于 MMEngine 中的“集成器”。它覆盖了框架的所有方面,并肩负着组织和调度几乎所有模块的责任,这意味着各模块之间的数据流也由 `Runner` 控制。 如 [MMEngine 中的 Runner 文档](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/runner.html)所示,下图展示了基本的数据流。 + +![Basic dataflow](https://user-images.githubusercontent.com/112053249/199228350-5f80699e-7fd2-4b4c-ac32-0b16b1922c2e.png) + +虚线边框、灰色填充形状代表不同的数据格式,而实心框表示模块/方法。由于 MMEngine 极大的灵活性和可扩展性,一些重要的基类可以被继承,并且它们的方法可以被覆写。 上图所示数据流仅适用于当用户没有自定义 `Runner` 中的 `TrainLoop`、`ValLoop` 和 `TestLoop`,并且没有在其自定义模型中覆写 `train_step`、`val_step` 和 `test_step` 方法时。MMSegmentation 中 loop 的默认设置如下:使用`IterBasedTrainLoop` 训练模型,共计 20000 次迭代,并且在每 2000 次迭代后进行一次验证。 + +```python +train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +``` + +在上图中,红色线表示 [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#train_step))*** ,在每次训练迭代中,数据加载器(dataloader)从存储中加载图像并传输到数据预处理器(data preprocessor),数据预处理器会将图像放到特定的设备上,并将数据堆叠到批处理中,之后模型接受批处理数据作为输入,最后将模型的输出发送给优化器(optimizer)。蓝色线表示 [val_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#val_step) 和 [test_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#test_step) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#test_step))*** 。这两个过程的数据流除了模型输出与 `train_step` 不同外,其余均和 `train_step` 类似。由于在评估时模型参数会被冻结,因此模型的输出将被传递给 [Evaluator](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/evaluation.md#ioumetric) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/evaluation.md#ioumetric))*** +来计算指标。 + +## MMSegmentation 中的数据流约定 + +在上面的图中,我们可以看到基本的数据流。在本节中,我们将分别介绍数据流中涉及的数据的格式约定。 + +### 数据加载器到数据预处理器 + +数据加载器(DataLoader)是 MMEngine 的训练和测试流程中的一个重要组件。 +从概念上讲,它源于 [PyTorch](https://pytorch.org/) 并保持一致。DataLoader 从文件系统加载数据,原始数据通过数据准备流程后被发送给数据预处理器。 + +MMSegmentation 在 [PackSegInputs](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/datasets/transforms/formatting.py#L12) 中定义了默认数据格式, 它是 `train_pipeline` 和 `test_pipeline` 的最后一个组件。有关数据转换 `pipeline` 的更多信息,请参阅[数据转换文档](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/transforms.html)。 ***([中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/transforms.html))*** + +在没有任何修改的情况下,PackSegInputs 的返回值通常是一个包含 `inputs` 和 `data_samples` 的 `dict`。以下伪代码展示了 mmseg 中数据加载器输出的数据类型,它是从数据集中获取的一批数据样本,数据加载器将它们打包成一个字典列表。`inputs` 是输入进模型的张量列表,`data_samples` 包含了输入图像的 meta information 和相应的 ground truth。 + +```python +dict( + inputs=List[torch.Tensor], + data_samples=List[SegDataSample] +) +``` + +**注意:** [SegDataSample](https://github.com/open-mmlab/mmsegmentation/blob/1.x/mmseg/structures/seg_data_sample.py) 是 MMSegmentation 的数据结构接口,用于连接不同组件。`SegDataSample` 实现了抽象数据元素 `mmengine.structures.BaseDataElement`,更多信息请在 [MMEngine](https://github.com/open-mmlab/mmengine) 中参阅 [SegDataSample 文档](https://mmsegmentation.readthedocs.io/zh_CN/1.x/advanced_guides/structures.html)和[数据元素文档](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/data_element.html)。 + +### 数据预处理器到模型 + +虽然在[上面的图](##数据流概述)中分开绘制了数据预处理器和模型,但数据预处理器是模型的一部分,因此可以在[模型教程](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/models.html)中找到数据预处理器章节。 ***([中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/models.html))*** + +数据预处理器的返回值是一个包含 `inputs` 和 `data_samples` 的字典,其中 `inputs` 是批处理图像的 4D 张量,`data_samples` 中添加了一些用于数据预处理的额外元信息。当传递给网络时,字典将被解包为两个值。 以下伪代码展示了数据预处理器的返回值和模型的输入值。 + +```python +dict( + inputs=torch.Tensor, + data_samples=List[SegDataSample] +) +``` + +```python +class Network(BaseSegmentor): + + def forward(self, inputs: torch.Tensor, data_samples: List[SegDataSample], mode: str): + pass +``` + +**注意:** 模型的前向传播有 3 种模式,由输入参数 mode 控制,更多信息请参阅[模型教程](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md)。 ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md))*** + +### 模型输出 + +如[模型教程](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#forward) ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#forward))*** 所提到的 3 种前向传播具有 3 种输出。 +`train_step` 和 `test_step`(或 `val_step`)分别对应于 `'loss'` 和 `'predict'`。 + +在 `test_step` 或 `val_step` 中,推理结果会被传递给 `Evaluator` 。您可以参阅[评估文档](https://mmsegmentation.readthedocs.io/en/dev-1.x/advanced_guides/evaluation.html) ***([中文链接待更新](https://mmsegmentation.readthedocs.io/zh_CN/dev-1.x/advanced_guides/evaluation.html))*** 来获取更多关于 `Evaluator` 的信息。 + +在推理后,MMSegmentation 中的 [BaseSegmentor](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L15) 会对推理结果进行简单的后处理以打包推理结果。神经网络生成的分割 logits,经过 `argmax` 操作后的分割 mask 和 ground truth(如果存在)将被打包到类似 `SegDataSample` 的实例。 [postprocess_result](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/segmentors/base.py#L132) 的返回值是一个 **`SegDataSample`的`List`**。下图显示了这些 `SegDataSample` 实例的关键属性。 + +![SegDataSample](https://user-images.githubusercontent.com/15952744/209912225-ab46a8d9-904a-43cb-8bf1-8bec4938ed29.png) + +与数据预处理器一致,损失函数也是模型的一部分,它是[解码头](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L142)的属性之一。 + +在 MMSegmentation 中,`decode_head` 的 [loss_by_feat](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/models/decode_heads/decode_head.py#L291) 方法是用于计算损失的统一接口。 + +参数: + +- seg_logits (Tensor):解码头前向函数的输出 +- batch_data_samples (List\[SegDataSample\]):分割数据样本,通常包括如 `metainfo` 和 `gt_sem_seg` 等信息 + +返回值: + +- dict\[str, Tensor\]:一个损失组件的字典 + +**注意:** `train_step` 将损失传递进 OptimWrapper 以更新模型中的权重,更多信息请参阅 [train_step](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/en/advanced_guides/models.md#train_step)。 ***([中文链接待更新](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/docs/zh_cn/advanced_guides/models.md#train_step))*** diff --git a/docs/zh_cn/advanced_guides/datasets.md b/docs/zh_cn/advanced_guides/datasets.md index 0f3ad2b682..546e97f70d 100644 --- a/docs/zh_cn/advanced_guides/datasets.md +++ b/docs/zh_cn/advanced_guides/datasets.md @@ -9,9 +9,10 @@ 实例化 Cityscapes 训练数据集: ```python +from mmengine.registry import init_default_scope from mmseg.datasets import CityscapesDataset -from mmseg.utils import register_all_modules -register_all_modules() + +init_default_scope('mmseg') data_root = 'data/cityscapes/' data_prefix=dict(img_path='leftImg8bit/train', seg_map_path='gtFine/train') diff --git a/docs/zh_cn/advanced_guides/evaluation.md b/docs/zh_cn/advanced_guides/evaluation.md index d07fcf1048..a82311ccc7 100644 --- a/docs/zh_cn/advanced_guides/evaluation.md +++ b/docs/zh_cn/advanced_guides/evaluation.md @@ -1 +1,3 @@ -# 模型评测 +# 模型评测 + +中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/evaluation.md) diff --git a/docs/zh_cn/advanced_guides/index.rst b/docs/zh_cn/advanced_guides/index.rst index b706742603..2aec1ac9cf 100644 --- a/docs/zh_cn/advanced_guides/index.rst +++ b/docs/zh_cn/advanced_guides/index.rst @@ -19,7 +19,7 @@ .. toctree:: :maxdepth: 1 - add_modules.md + add_models.md add_datasets.md add_transforms.md add_metrics.md diff --git a/docs/zh_cn/advanced_guides/models.md b/docs/zh_cn/advanced_guides/models.md index bebf4ef44f..62dbea38c4 100644 --- a/docs/zh_cn/advanced_guides/models.md +++ b/docs/zh_cn/advanced_guides/models.md @@ -1 +1,3 @@ # 模型 + +中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/models.md) diff --git a/docs/zh_cn/advanced_guides/transforms.md b/docs/zh_cn/advanced_guides/transforms.md index c96b688bd9..1cbe79ba49 100644 --- a/docs/zh_cn/advanced_guides/transforms.md +++ b/docs/zh_cn/advanced_guides/transforms.md @@ -1 +1,3 @@ -# 数据增广 +# 数据增强变化 + +中文版文档支持中,请先阅读[英文版本](../../en/advanced_guides/transforms.md) diff --git a/docs/zh_cn/api.rst b/docs/zh_cn/api.rst index 94f64313d0..3478aa9361 100644 --- a/docs/zh_cn/api.rst +++ b/docs/zh_cn/api.rst @@ -11,11 +11,6 @@ datasets .. automodule:: mmseg.datasets :members: -samplers -^^^^^^^^^^ -.. automodule:: mmseg.datasets.samplers - :members: - transforms ^^^^^^^^^^^^ .. automodule:: mmseg.datasets.transforms diff --git a/docs/zh_cn/device/npu.md b/docs/zh_cn/device/npu.md new file mode 100644 index 0000000000..d50439d040 --- /dev/null +++ b/docs/zh_cn/device/npu.md @@ -0,0 +1,39 @@ +# NPU (华为 昇腾) + +## 使用方法 + +请参考 [MMCV 的安装文档](https://mmcv.readthedocs.io/en/latest/get_started/build.html#build-mmcv-full-on-ascend-npu-machine) 来安装 NPU 版本的 MMCV。 + +以下展示单机四卡场景的运行指令: + +```shell +bash tools/dist_train.sh configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py 4 +``` + +以下展示单机单卡下的运行指令: + +```shell +python tools/train.py configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py +``` + +## 模型验证结果 + +| Model | mIoU | Config | Download | +| :-----------------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------ | +| [deeplabv3](<>) | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024_20230115_205626.json) | +| [deeplabv3plus](<>) | 79.23 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024_20230116_043450.json) | +| [hrnet](<>) | 78.1 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/hrnet/fcn_hr18_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_hr18_4xb2-40k_cityscapes-512x1024_20230116_215821.json) | +| [fcn](<>) | 74.15 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/fcn_r50-d8_4xb2-40k_cityscapes-512x1024_20230111_083014.json) | +| [icnet](<>) | 69.25 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/icnet/icnet_r50-d8_4xb2-80k_cityscapes-832x832.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/icnet_r50-d8_4xb2-80k_cityscapes-832x832_20230119_002929.json) | +| [pspnet](<>) | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/pspnet/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/pspnet_r50b-d8_4xb2-80k_cityscapes-512x1024_20230114_042721.json) | +| [unet](<>) | 68.86 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024_20230129_224750.json) | +| [upernet](<>) | 77.81 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/upernet/upernet_r50_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/upernet_r50_4xb2-40k_cityscapes-512x1024_20230129_014634.json) | +| [apcnet](<>) | 78.02 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/apcnet/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/apcnet_r50-d8_4xb2-40k_cityscapes-512x1024_20230209_212545.json) | +| [bisenetv1](<>) | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv1/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv1_r50-d32_4xb4-160k_cityscapes-1024x1024_20230201_023946.json) | +| [bisenetv2](<>) | 72.44 | [config](https://github.com/open-mmlab/mmsegmentation/tree/1.x/configs/bisenetv2/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024.py) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/device/npu/bisenetv2_fcn_4xb4-amp-160k_cityscapes-1024x1024_20230205_215606.json) | + +**注意:** + +- 如果没有特别标记,NPU 上的使用混合精度训练的结果与使用 FP32 的 GPU 上的结果相同。 + +**以上模型结果由华为昇腾团队提供** diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md index fff70a2560..da6d728a15 100644 --- a/docs/zh_cn/get_started.md +++ b/docs/zh_cn/get_started.md @@ -34,7 +34,7 @@ conda install pytorch torchvision cpuonly -c pytorch ## 安装 -我们建议用户遵循我们的最佳实践来安装 MMSegmentation 。但是整个过程是高度自定义的。更多信息请参见[自定义安装](#自定义安装)部分。 +我们建议用户遵循我们的最佳实践来安装 MMSegmentation 。但是整个过程是高度自定义的。更多信息请参见[自定义安装](##自定义安装)部分。 ### 最佳实践 @@ -92,10 +92,8 @@ python demo/image_demo.py demo/demo.png configs/pspnet/pspnet_r50-d8_4xb2-40k_ci ```python from mmseg.apis import inference_model, init_model, show_result_pyplot -from mmseg.utils import register_all_modules import mmcv -register_all_modules() config_file = 'pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py' checkpoint_file = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' diff --git a/docs/zh_cn/migration.md b/docs/zh_cn/migration.md index 5168bb0bd5..3f19b26714 100644 --- a/docs/zh_cn/migration.md +++ b/docs/zh_cn/migration.md @@ -1 +1,3 @@ # 迁移文档 + +中文迁移文档在支持中,请先阅读[英文版迁移文档](../en/migration/) diff --git a/docs/zh_cn/modelzoo_statistics.md b/docs/zh_cn/modelzoo_statistics.md new file mode 100644 index 0000000000..b057575a25 --- /dev/null +++ b/docs/zh_cn/modelzoo_statistics.md @@ -0,0 +1,102 @@ +# 模型库统计数据 + +- 论文数量: 47 + + - ALGORITHM: 36 + - BACKBONE: 11 + +- 模型数量: 612 + + - \[ALGORITHM\] [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) (16 ckpts) + + - \[ALGORITHM\] [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) (12 ckpts) + + - \[BACKBONE\] [BEiT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/beit) (2 ckpts) + + - \[ALGORITHM\] [BiSeNetV1](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1) (11 ckpts) + + - \[ALGORITHM\] [BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2) (4 ckpts) + + - \[ALGORITHM\] [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) (16 ckpts) + + - \[ALGORITHM\] [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) (2 ckpts) + + - \[BACKBONE\] [ConvNeXt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext) (6 ckpts) + + - \[ALGORITHM\] [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) (16 ckpts) + + - \[ALGORITHM\] [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) (41 ckpts) + + - \[ALGORITHM\] [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) (42 ckpts) + + - \[ALGORITHM\] [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) (12 ckpts) + + - \[ALGORITHM\] [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) (12 ckpts) + + - \[ALGORITHM\] [DPT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dpt) (1 ckpts) + + - \[ALGORITHM\] [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) (4 ckpts) + + - \[ALGORITHM\] [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) (12 ckpts) + + - \[ALGORITHM\] [ERFNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/erfnet) (1 ckpts) + + - \[ALGORITHM\] [FastFCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn) (12 ckpts) + + - \[ALGORITHM\] [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) (1 ckpts) + + - \[ALGORITHM\] [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) (41 ckpts) + + - \[ALGORITHM\] [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) (16 ckpts) + + - \[BACKBONE\] [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) (37 ckpts) + + - \[ALGORITHM\] [ICNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet) (12 ckpts) + + - \[ALGORITHM\] [ISANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet) (16 ckpts) + + - \[ALGORITHM\] [K-Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet) (7 ckpts) + + - \[BACKBONE\] [MAE](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mae) (1 ckpts) + + - \[ALGORITHM\] [Mask2Former](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mask2former) (13 ckpts) + + - \[ALGORITHM\] [MaskFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/maskformer) (4 ckpts) + + - \[BACKBONE\] [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) (8 ckpts) + + - \[BACKBONE\] [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) (4 ckpts) + + - \[ALGORITHM\] [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net) (16 ckpts) + + - \[ALGORITHM\] [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) (24 ckpts) + + - \[ALGORITHM\] [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) (4 ckpts) + + - \[BACKBONE\] [PoolFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/poolformer) (5 ckpts) + + - \[ALGORITHM\] [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) (16 ckpts) + + - \[ALGORITHM\] [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) (54 ckpts) + + - \[BACKBONE\] [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) (8 ckpts) + + - \[ALGORITHM\] [SegFormer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer) (13 ckpts) + + - \[ALGORITHM\] [Segmenter](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter) (5 ckpts) + + - \[ALGORITHM\] [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn) (4 ckpts) + + - \[ALGORITHM\] [SETR](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr) (7 ckpts) + + - \[ALGORITHM\] [STDC](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc) (4 ckpts) + + - \[BACKBONE\] [Swin Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin) (6 ckpts) + + - \[BACKBONE\] [Twins](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins) (12 ckpts) + + - \[ALGORITHM\] [UNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet) (25 ckpts) + + - \[ALGORITHM\] [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) (16 ckpts) + + - \[BACKBONE\] [Vision Transformer](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit) (11 ckpts) diff --git a/docs/zh_cn/user_guides/2_dataset_prepare.md b/docs/zh_cn/user_guides/2_dataset_prepare.md index a8dde9211a..c9c3606977 100644 --- a/docs/zh_cn/user_guides/2_dataset_prepare.md +++ b/docs/zh_cn/user_guides/2_dataset_prepare.md @@ -1,406 +1,3 @@ ## 准备数据集(待更新) -推荐用软链接, 将数据集根目录链接到 `$MMSEGMENTATION/data` 里. 如果您的文件夹结构是不同的, 您也许可以试着修改配置文件里对应的路径. - -```none -mmsegmentation -├── mmseg -├── tools -├── configs -├── data -│ ├── cityscapes -│ │ ├── leftImg8bit -│ │ │ ├── train -│ │ │ ├── val -│ │ ├── gtFine -│ │ │ ├── train -│ │ │ ├── val -│ ├── VOCdevkit -│ │ ├── VOC2012 -│ │ │ ├── JPEGImages -│ │ │ ├── SegmentationClass -│ │ │ ├── ImageSets -│ │ │ │ ├── Segmentation -│ │ ├── VOC2010 -│ │ │ ├── JPEGImages -│ │ │ ├── SegmentationClassContext -│ │ │ ├── ImageSets -│ │ │ │ ├── SegmentationContext -│ │ │ │ │ ├── train.txt -│ │ │ │ │ ├── val.txt -│ │ │ ├── trainval_merged.json -│ │ ├── VOCaug -│ │ │ ├── dataset -│ │ │ │ ├── cls -│ ├── ade -│ │ ├── ADEChallengeData2016 -│ │ │ ├── annotations -│ │ │ │ ├── training -│ │ │ │ ├── validation -│ │ │ ├── images -│ │ │ │ ├── training -│ │ │ │ ├── validation -│ ├── CHASE_DB1 -│ │ ├── images -│ │ │ ├── training -│ │ │ ├── validation -│ │ ├── annotations -│ │ │ ├── training -│ │ │ ├── validation -│ ├── DRIVE -│ │ ├── images -│ │ │ ├── training -│ │ │ ├── validation -│ │ ├── annotations -│ │ │ ├── training -│ │ │ ├── validation -│ ├── HRF -│ │ ├── images -│ │ │ ├── training -│ │ │ ├── validation -│ │ ├── annotations -│ │ │ ├── training -│ │ │ ├── validation -│ ├── STARE -│ │ ├── images -│ │ │ ├── training -│ │ │ ├── validation -│ │ ├── annotations -│ │ │ ├── training -│ │ │ ├── validation -| ├── dark_zurich -| │   ├── gps -| │   │   ├── val -| │   │   └── val_ref -| │   ├── gt -| │   │   └── val -| │   ├── LICENSE.txt -| │   ├── lists_file_names -| │   │   ├── val_filenames.txt -| │   │   └── val_ref_filenames.txt -| │   ├── README.md -| │   └── rgb_anon -| │   | ├── val -| │   | └── val_ref -| ├── NighttimeDrivingTest -| | ├── gtCoarse_daytime_trainvaltest -| | │   └── test -| | │   └── night -| | └── leftImg8bit -| | | └── test -| | | └── night -│ ├── loveDA -│ │ ├── img_dir -│ │ │ ├── train -│ │ │ ├── val -│ │ │ ├── test -│ │ ├── ann_dir -│ │ │ ├── train -│ │ │ ├── val -│ ├── potsdam -│ │ ├── img_dir -│ │ │ ├── train -│ │ │ ├── val -│ │ ├── ann_dir -│ │ │ ├── train -│ │ │ ├── val -│ ├── vaihingen -│ │ ├── img_dir -│ │ │ ├── train -│ │ │ ├── val -│ │ ├── ann_dir -│ │ │ ├── train -│ │ │ ├── val -│ ├── iSAID -│ │ ├── img_dir -│ │ │ ├── train -│ │ │ ├── val -│ │ │ ├── test -│ │ ├── ann_dir -│ │ │ ├── train -│ │ │ ├── val -│ ├── synapse -│ │ ├── img_dir -│ │ │ ├── train -│ │ │ ├── val -│ │ ├── ann_dir -│ │ │ ├── train -│ │ │ ├── val -``` - -### Cityscapes - -注册成功后, 数据集可以在 [这里](https://www.cityscapes-dataset.com/downloads/) 下载. - -通常情况下, `**labelTrainIds.png` 被用来训练 cityscapes. -基于 [cityscapesscripts](https://github.com/mcordts/cityscapesScripts), -我们提供了一个 [脚本](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/convert_datasets/cityscapes.py), -去生成 `**labelTrainIds.png`. - -```shell -# --nproc 8 意味着有 8 个进程用来转换,它也可以被忽略. -python tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8 -``` - -### Pascal VOC - -Pascal VOC 2012 可以在 [这里](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) 下载. -此外, 许多最近在 Pascal VOC 数据集上的工作都会利用增广的数据, 它们可以在 [这里](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz) 找到. - -如果您想使用增广后的 VOC 数据集, 请运行下面的命令来将数据增广的标注转成正确的格式. - -```shell -# --nproc 8 意味着有 8 个进程用来转换,它也可以被忽略. -python tools/convert_datasets/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8 -``` - -关于如何拼接数据集 (concatenate) 并一起训练它们, 更多细节请参考 [拼接连接数据集](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/tutorials/customize_datasets.md#%E6%8B%BC%E6%8E%A5%E6%95%B0%E6%8D%AE%E9%9B%86) . - -### ADE20K - -ADE20K 的训练集和验证集可以在 [这里](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip) 下载. -您还可以在 [这里](http://data.csail.mit.edu/places/ADEchallenge/release_test.zip) 下载验证集. - -### Pascal Context - -Pascal Context 的训练集和验证集可以在 [这里](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar) 下载. -注册成功后, 您还可以在 [这里](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) 下载验证集. - -为了从原始数据集里切分训练集和验证集, 您可以在 [这里](https://codalabuser.blob.core.windows.net/public/trainval_merged.json) -下载 trainval_merged.json. - -如果您想使用 Pascal Context 数据集, -请安装 [细节](https://github.com/zhanghang1989/detail-api) 然后再运行如下命令来把标注转换成正确的格式. - -```shell -python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json -``` - -### CHASE DB1 - -CHASE DB1 的训练集和验证集可以在 [这里](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip) 下载. - -为了将 CHASE DB1 数据集转换成 MMSegmentation 的格式,您需要运行如下命令: - -```shell -python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip -``` - -这个脚本将自动生成正确的文件夹结构. - -### DRIVE - -DRIVE 的训练集和验证集可以在 [这里](https://drive.grand-challenge.org/) 下载. -在此之前, 您需要注册一个账号, 当前 '1st_manual' 并未被官方提供, 因此需要您从其他地方获取. - -为了将 DRIVE 数据集转换成 MMSegmentation 格式, 您需要运行如下命令: - -```shell -python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip -``` - -这个脚本将自动生成正确的文件夹结构. - -### HRF - -首先, 下载 [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip) [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) 以及 [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip). - -为了将 HRF 数据集转换成 MMSegmentation 格式, 您需要运行如下命令: - -```shell -python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip -``` - -这个脚本将自动生成正确的文件夹结构. - -### STARE - -首先, 下载 [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) 和 [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar). - -为了将 STARE 数据集转换成 MMSegmentation 格式, 您需要运行如下命令: - -```shell -python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar -``` - -这个脚本将自动生成正确的文件夹结构. - -### Dark Zurich - -因为我们只支持在此数据集上测试模型, 所以您只需下载[验证集](https://data.vision.ee.ethz.ch/csakarid/shared/GCMA_UIoU/Dark_Zurich_val_anon.zip). - -### Nighttime Driving - -因为我们只支持在此数据集上测试模型,所以您只需下载[测试集](http://data.vision.ee.ethz.ch/daid/NighttimeDriving/NighttimeDrivingTest.zip). - -### LoveDA - -可以从 Google Drive 里下载 [LoveDA数据集](https://drive.google.com/drive/folders/1ibYV0qwn4yuuh068Rnc-w4tPi0U0c-ti?usp=sharing). - -或者它还可以从 [zenodo](https://zenodo.org/record/5706578#.YZvN7SYRXdF) 下载, 您需要运行如下命令: - -```shell -# Download Train.zip -wget https://zenodo.org/record/5706578/files/Train.zip -# Download Val.zip -wget https://zenodo.org/record/5706578/files/Val.zip -# Download Test.zip -wget https://zenodo.org/record/5706578/files/Test.zip -``` - -对于 LoveDA 数据集,请运行以下命令下载并重新组织数据集: - -```shell -python tools/convert_datasets/loveda.py /path/to/loveDA -``` - -请参照 [这里](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/inference.md) 来使用训练好的模型去预测 LoveDA 测试集并且提交到官网. - -关于 LoveDA 的更多细节可以在[这里](https://github.com/Junjue-Wang/LoveDA) 找到. - -### ISPRS Potsdam - -[Potsdam](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-potsdam/) -数据集是一个有着2D 语义分割内容标注的城市遥感数据集. -数据集可以从挑战[主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/) 获得. -需要其中的 `2_Ortho_RGB.zip` 和 `5_Labels_all_noBoundary.zip`. - -对于 Potsdam 数据集,请运行以下命令下载并重新组织数据集 - -```shell -python tools/convert_datasets/potsdam.py /path/to/potsdam -``` - -使用我们默认的配置, 将生成 3,456 张图片的训练集和 2,016 张图片的验证集. - -### ISPRS Vaihingen - -[Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/) -数据集是一个有着2D 语义分割内容标注的城市遥感数据集. - -数据集可以从挑战 [主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/). -需要其中的 'ISPRS_semantic_labeling_Vaihingen.zip' 和 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip'. - -对于 Vaihingen 数据集, 请运行以下命令下载并重新组织数据集 - -```shell -python tools/convert_datasets/vaihingen.py /path/to/vaihingen -``` - -使用我们默认的配置 (`clip_size`=512, `stride_size`=256), 将生成 344 张图片的训练集和 398 张图片的验证集. - -### iSAID - -iSAID 数据集(训练集/验证集/测试集)的图像可以从 [DOTA-v1.0](https://captain-whu.github.io/DOTA/dataset.html) 下载. - -iSAID 数据集(训练集/验证集)的注释可以从 [iSAID](https://captain-whu.github.io/iSAID/dataset.html) 下载. - -该数据集是一个大规模的实例分割(也可以用于语义分割)的遥感数据集. - -下载后, 在数据集转换前, 您需要将数据集文件夹调整成如下格式. - -``` -│ ├── iSAID -│ │ ├── train -│ │ │ ├── images -│ │ │ │ ├── part1.zip -│ │ │ │ ├── part2.zip -│ │ │ │ ├── part3.zip -│ │ │ ├── Semantic_masks -│ │ │ │ ├── images.zip -│ │ ├── val -│ │ │ ├── images -│ │ │ │ ├── part1.zip -│ │ │ ├── Semantic_masks -│ │ │ │ ├── images.zip -│ │ ├── test -│ │ │ ├── images -│ │ │ │ ├── part1.zip -│ │ │ │ ├── part2.zip -``` - -```shell -python tools/convert_datasets/isaid.py /path/to/iSAID -``` - -使用我们默认的配置 (`patch_width`=896, `patch_height`=896, `overlap_area`=384), 将生成 33,978 张图片的训练集和 11,644 张图片的验证集. - -## Synapse dataset - -这个数据集可以在这个[网页](https://www.synapse.org/#!Synapse:syn3193805/wiki/) 里被下载. -我们参考了 [TransUNet](https://arxiv.org/abs/2102.04306) 里面的数据集预处理的设置, 它将原始数据集 (30 套 3D 样例) 切分出 18 套用于训练, 12 套用于验证. 请参考以下步骤来准备该数据集: - -```shell -unzip RawData.zip -cd ./RawData/Training -``` - -随后新建 `train.txt` 和 `val.txt`. - -根据 TransUNet 来将训练集和验证集如下划分: - -train.txt - -```none -img0005.nii.gz -img0006.nii.gz -img0007.nii.gz -img0009.nii.gz -img0010.nii.gz -img0021.nii.gz -img0023.nii.gz -img0024.nii.gz -img0026.nii.gz -img0027.nii.gz -img0028.nii.gz -img0030.nii.gz -img0031.nii.gz -img0033.nii.gz -img0034.nii.gz -img0037.nii.gz -img0039.nii.gz -img0040.nii.gz -``` - -val.txt - -```none -img0008.nii.gz -img0022.nii.gz -img0038.nii.gz -img0036.nii.gz -img0032.nii.gz -img0002.nii.gz -img0029.nii.gz -img0003.nii.gz -img0001.nii.gz -img0004.nii.gz -img0025.nii.gz -img0035.nii.gz -``` - -此时, synapse 数据集包括了以下内容: - -```none -├── Training -│ ├── img -│ │ ├── img0001.nii.gz -│ │ ├── img0002.nii.gz -│ │ ├── ... -│ ├── label -│ │ ├── label0001.nii.gz -│ │ ├── label0002.nii.gz -│ │ ├── ... -│ ├── train.txt -│ ├── val.txt -``` - -随后, 运行下面的数据集转换脚本来处理 synapse 数据集: - -```shell -python tools/dataset_converters/synapse.py --dataset-path /path/to/synapse -``` - -使用我们默认的配置, 将生成 2,211 张 2D 图片的训练集和 1,568 张图片的验证集. - -需要注意的是 MMSegmentation 默认的评价指标 (例如平均 Dice 值) 都是基于每帧 2D 图片计算的, 这与基于每套 3D 图片计算评价指标的 [TransUNet](https://arxiv.org/abs/2102.04306) 是不同的. +中文版文档支持中,请先阅读[英文版本](../../en/user_guides/2_dataset_prepare.md) diff --git a/docs/zh_cn/user_guides/3_inference.md b/docs/zh_cn/user_guides/3_inference.md index b90f73420c..d2fe60076f 100644 --- a/docs/zh_cn/user_guides/3_inference.md +++ b/docs/zh_cn/user_guides/3_inference.md @@ -1,127 +1,3 @@ ## 使用预训练模型推理(待更新) -我们提供测试脚本来评估完整数据集(Cityscapes, PASCAL VOC, ADE20k 等)上的结果,同时为了使其他项目的整合更容易,也提供一些高级 API。 - -### 测试一个数据集 - -- 单卡 GPU -- CPU -- 单节点多卡 GPU -- 多节点 - -您可以使用以下命令来测试一个数据集。 - -```shell -# 单卡 GPU 测试 -python tools/test.py ${配置文件} ${检查点文件} [--out ${结果文件}] [--eval ${评估指标}] [--show] - -# CPU: 如果机器没有 GPU, 则跟上述单卡 GPU 测试一致 -# CPU: 如果机器有 GPU, 那么先禁用 GPU 再运行单 GPU 测试脚本 -export CUDA_VISIBLE_DEVICES=-1 # 禁用 GPU -python tools/test.py ${配置文件} ${检查点文件} [--out ${结果文件}] [--eval ${评估指标}] [--show] - -# 多卡GPU 测试 -./tools/dist_test.sh ${配置文件} ${检查点文件} ${GPU数目} [--out ${结果文件}] [--eval ${评估指标}] -``` - -可选参数: - -- `RESULT_FILE`: pickle 格式的输出结果的文件名,如果不专门指定,结果将不会被专门保存成文件。(MMseg v0.17 之后,args.out 将只会保存评估时的中间结果或者是分割图的保存路径。) -- `EVAL_METRICS`: 在结果里将被评估的指标。这主要取决于数据集, `mIoU` 对于所有数据集都可获得,像 Cityscapes 数据集可以通过 `cityscapes` 命令来专门评估,就像标准的 `mIoU`一样。 -- `--show`: 如果被指定,分割结果将会在一张图像里画出来并且在另一个窗口展示。它仅仅是用来调试与可视化,并且仅针对单卡 GPU 测试。请确认 GUI 在您的环境里可用,否则您也许会遇到报错 `cannot connect to X server` -- `--show-dir`: 如果被指定,分割结果将会在一张图像里画出来并且保存在指定文件夹里。它仅仅是用来调试与可视化,并且仅针对单卡GPU测试。使用该参数时,您的环境不需要 GUI。 -- `--eval-options`: 评估时的可选参数,当设置 `efficient_test=True` 时,它将会保存中间结果至本地文件里以节约 CPU 内存。请确认您本地硬盘有足够的存储空间(大于20GB)。(MMseg v0.17 之后,`efficient_test` 不再生效,我们重构了 test api,通过使用一种渐近式的方式来提升评估和保存结果的效率。) - -例子: - -假设您已经下载检查点文件至文件夹 `checkpoints/` 里。 - -1. 测试 PSPNet 并可视化结果。按下任何键会进行到下一张图 - - ```shell - python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ - checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ - --show - ``` - -2. 测试 PSPNet 并保存画出的图以便于之后的可视化 - - ```shell - python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ - checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ - --show-dir psp_r50_512x1024_40ki_cityscapes_results - ``` - -3. 在数据集 PASCAL VOC (不保存测试结果) 上测试 PSPNet 并评估 mIoU - - ```shell - python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_20k_voc12aug.py \ - checkpoints/pspnet_r50-d8_512x1024_20k_voc12aug_20200605_003338-c57ef100.pth \ - --eval mAP - ``` - -4. 使用4卡 GPU 测试 PSPNet,并且在标准 mIoU 和 cityscapes 指标里评估模型 - - ```shell - ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ - checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ - 4 --out results.pkl --eval mIoU cityscapes - ``` - - 注意:在 cityscapes mIoU 和我们的 mIoU 指标会有一些差异 (~0.1%) 。因为 cityscapes 默认是根据类别样本数的多少进行加权平均,而我们对所有的数据集都是采取直接平均的方法来得到 mIoU。 - -5. 在 cityscapes 数据集上4卡 GPU 测试 PSPNet, 并生成 png 文件以便提交给官方评估服务器 - - 首先,在配置文件里添加内容: `configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py`, - - ```python - data = dict( - test=dict( - img_dir='leftImg8bit/test', - ann_dir='gtFine/test')) - ``` - - 随后,进行测试。 - - ```shell - ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ - checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ - 4 --format-only --eval-options "imgfile_prefix=./pspnet_test_results" - ``` - - 您会在文件夹 `./pspnet_test_results` 里得到生成的 png 文件。 - 您也许可以运行 `zip -r results.zip pspnet_test_results/` 并提交 zip 文件给 [evaluation server](https://www.cityscapes-dataset.com/submit/) 。 - -6. 在 Cityscapes 数据集上使用 CPU 高效内存选项来测试 DeeplabV3+ `mIoU` 指标 (没有保存测试结果) - - ```shell - python tools/test.py \ - configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py \ - deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth \ - --eval-options efficient_test=True \ - --eval mIoU - ``` - - 使用 `pmap` 可查看 CPU 内存情况, `efficient_test=True` 会使用约 2.25GB 的 CPU 内存, `efficient_test=False` 会使用约 11.06GB 的 CPU 内存。 这个可选参数可以节约很多 CPU 内存。(MMseg v0.17 之后, `efficient_test` 参数将不再生效, 我们使用了一种渐近的方式来更加有效快速地评估和保存结果。) - -7. 在 LoveDA 数据集上1卡 GPU 测试 PSPNet, 并生成 png 文件以便提交给官方评估服务器 - - 首先,在配置文件里添加内容: `configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py`, - - ```python - data = dict( - test=dict( - img_dir='img_dir/test', - ann_dir='ann_dir/test')) - ``` - - 随后,进行测试。 - - ```shell - python ./tools/test.py configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py \ - checkpoints/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth \ - --format-only --eval-options "imgfile_prefix=./pspnet_test_results" - ``` - - 您会在文件夹 `./pspnet_test_results` 里得到生成的 png 文件。 - 您也许可以运行 `zip -r -j Results.zip pspnet_test_results/` 并提交 zip 文件给 [evaluation server](https://codalab.lisn.upsaclay.fr/competitions/421) 。 +中文版文档支持中,请先阅读[英文版本](../../en/user_guides/3_inference.md) diff --git a/docs/zh_cn/user_guides/4_train_test.md b/docs/zh_cn/user_guides/4_train_test.md index b26132e765..309e046b2d 100644 --- a/docs/zh_cn/user_guides/4_train_test.md +++ b/docs/zh_cn/user_guides/4_train_test.md @@ -43,7 +43,7 @@ python tools/train.py ${配置文件} --resume --cfg-options load_from=${检查 export CUDA_VISIBLE_DEVICES=-1 ``` -然后运行[上方](#在单GPU上训练)脚本。 +然后运行[上方](###在单GPU上训练)脚本。 ### 在单GPU上测试 @@ -69,7 +69,7 @@ python tools/test.py ${配置文件} ${模型权重文件} [可选参数] export CUDA_VISIBLE_DEVICES=-1 ``` -然后运行[上方](#在单GPU上测试)脚本。 +然后运行[上方](###在单GPU上测试)脚本。 ## 多GPU、多机器上训练和测试 @@ -85,7 +85,7 @@ OpenMMLab2.0 通过 `MMDistributedDataParallel`实现 **分布式** 训练。 sh tools/dist_train.sh ${配置文件} ${GPU数量} [可选参数] ``` -可选参数与[上方](#在单GPU上训练)相同并且还增加了可以指定gpu数量的参数。 +可选参数与[上方](###在单GPU上训练)相同并且还增加了可以指定gpu数量的参数。 示例: @@ -112,7 +112,7 @@ ln -s ${您的工作路径} ${MMSEG 路径}/work_dirs sh tools/dist_test.sh ${配置文件} ${检查点文件} ${GPU数量} [可选参数] ``` -可选参数与[上方](#在单GPU上测试)相同并且增加了可以指定 gpu 数量的参数。 +可选参数与[上方](###在单GPU上测试)相同并且增加了可以指定 gpu 数量的参数。 示例: diff --git a/docs/zh_cn/user_guides/index.rst b/docs/zh_cn/user_guides/index.rst index dacac79698..d0a313d31e 100644 --- a/docs/zh_cn/user_guides/index.rst +++ b/docs/zh_cn/user_guides/index.rst @@ -18,3 +18,4 @@ visualization.md useful_tools.md deployment.md + visualization_feature_map.md diff --git a/docs/zh_cn/user_guides/visualization.md b/docs/zh_cn/user_guides/visualization.md index ac8b9e289f..2ef020ba85 100644 --- a/docs/zh_cn/user_guides/visualization.md +++ b/docs/zh_cn/user_guides/visualization.md @@ -69,7 +69,7 @@ default_hooks = dict( work_dirs/test_visual/20220810_115248/vis_data/vis_image ``` -另外,如果在 `vis_backends` 中添加 `TensorboardVisBackend` ,如 [TensorBoard 的配置](#tensorboard-configuration),我们还可以运行下面的命令在 TensorBoard 中查看它们: +另外,如果在 `vis_backends` 中添加 `TensorboardVisBackend` ,如 [TensorBoard 的配置](###TensorBoard的配置),我们还可以运行下面的命令在 TensorBoard 中查看它们: ```shell tensorboard --logdir work_dirs/test_visual/20220810_115248/vis_data diff --git a/docs/zh_cn/user_guides/visualization_feature_map.md b/docs/zh_cn/user_guides/visualization_feature_map.md new file mode 100644 index 0000000000..fda99bb5ab --- /dev/null +++ b/docs/zh_cn/user_guides/visualization_feature_map.md @@ -0,0 +1,201 @@ +# wandb记录特征图可视化 + +MMSegmentation 1.x 提供了 Weights & Biases 的后端支持,方便对项目代码结果的可视化和管理。 + +## Wandb的配置 + +安装 Weights & Biases 的过程可以参考 [官方安装指南](https://docs.wandb.ai/quickstart),具体的步骤如下: + +```shell +pip install wandb +wandb login +``` + +在 `vis_backend` 中添加 `WandbVisBackend`。 + +```python +vis_backends=[dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend'), + dict(type='WandbVisBackend')] +``` + +## 测试数据和结果及特征图的可视化 + +`SegLocalVisualizer` 是继承自 MMEngine 中 `Visualizer` 类的子类,适用于 MMSegmentation 可视化,有关 `Visualizer` 的详细信息请参考在 MMEngine 中的[可视化教程](https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/visualization.html) 。 + +以下是一个关于 `SegLocalVisualizer` 的示例,首先你可以使用下面的命令下载这个案例中的数据: + +
+ +
+ +```shell +wget https://user-images.githubusercontent.com/24582831/189833109-eddad58f-f777-4fc0-b98a-6bd429143b06.png --output-document aachen_000000_000019_leftImg8bit.png +wget https://user-images.githubusercontent.com/24582831/189833143-15f60f8a-4d1e-4cbb-a6e7-5e2233869fac.png --output-document aachen_000000_000019_gtFine_labelTrainIds.png + +wget https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth + +``` + +```python +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser +from typing import Type + +import mmcv +import torch +import torch.nn as nn + +from mmengine.model import revert_sync_batchnorm +from mmengine.structures import PixelData +from mmseg.apis import inference_model, init_model +from mmseg.structures import SegDataSample +from mmseg.utils import register_all_modules +from mmseg.visualization import SegLocalVisualizer + + +class Recorder: + """record the forward output feature map and save to data_buffer.""" + + def __init__(self) -> None: + self.data_buffer = list() + + def __enter__(self, ): + self._data_buffer = list() + + def record_data_hook(self, model: nn.Module, input: Type, output: Type): + self.data_buffer.append(output) + + def __exit__(self, *args, **kwargs): + pass + + +def visualize(args, model, recorder, result): + seg_visualizer = SegLocalVisualizer( + vis_backends=[dict(type='WandbVisBackend')], + save_dir='temp_dir', + alpha=0.5) + seg_visualizer.dataset_meta = dict( + classes=model.dataset_meta['classes'], + palette=model.dataset_meta['palette']) + + image = mmcv.imread(args.img, 'color') + + seg_visualizer.add_datasample( + name='predict', + image=image, + data_sample=result, + draw_gt=False, + draw_pred=True, + wait_time=0, + out_file=None, + show=False) + + # add feature map to wandb visualizer + for i in range(len(recorder.data_buffer)): + feature = recorder.data_buffer[i][0] # remove the batch + drawn_img = seg_visualizer.draw_featmap( + feature, image, channel_reduction='select_max') + seg_visualizer.add_image(f'feature_map{i}', drawn_img) + + if args.gt_mask: + sem_seg = mmcv.imread(args.gt_mask, 'unchanged') + sem_seg = torch.from_numpy(sem_seg) + gt_mask = dict(data=sem_seg) + gt_mask = PixelData(**gt_mask) + data_sample = SegDataSample() + data_sample.gt_sem_seg = gt_mask + + seg_visualizer.add_datasample( + name='gt_mask', + image=image, + data_sample=data_sample, + draw_gt=True, + draw_pred=False, + wait_time=0, + out_file=None, + show=False) + + seg_visualizer.add_image('image', image) + + +def main(): + parser = ArgumentParser( + description='Draw the Feature Map During Inference') + parser.add_argument('img', help='Image file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('--gt_mask', default=None, help='Path of gt mask file') + parser.add_argument('--out-file', default=None, help='Path to output file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + parser.add_argument( + '--title', default='result', help='The image identifier.') + args = parser.parse_args() + + register_all_modules() + + # build the model from a config file and a checkpoint file + model = init_model(args.config, args.checkpoint, device=args.device) + if args.device == 'cpu': + model = revert_sync_batchnorm(model) + + # show all named module in the model and use it in source list below + for name, module in model.named_modules(): + print(name) + + source = [ + 'decode_head.fusion.stages.0.query_project.activate', + 'decode_head.context.stages.0.key_project.activate', + 'decode_head.context.bottleneck.activate' + ] + source = dict.fromkeys(source) + + count = 0 + recorder = Recorder() + # registry the forward hook + for name, module in model.named_modules(): + if name in source: + count += 1 + module.register_forward_hook(recorder.record_data_hook) + if count == len(source): + break + + with recorder: + # test a single image, and record feature map to data_buffer + result = inference_model(model, args.img) + + visualize(args, model, recorder, result) + + +if __name__ == '__main__': + main() + +``` + +将上述代码保存为 feature_map_visual.py,在终端执行如下代码 + +```shell +python feature_map_visual.py ${图像} ${配置文件} ${检查点文件} [可选参数] +``` + +样例 + +```shell +python feature_map_visual.py \ +aachen_000000_000019_leftImg8bit.png \ +configs/ann/ann_r50-d8_4xb2-40k_cityscapes-512x1024.py \ +ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth \ +--gt_mask aachen_000000_000019_gtFine_labelTrainIds.png +``` + +可视化后的图像结果和它的对应的 feature map图像会出现在wandb账户中 + +
+ +
diff --git a/mmseg/__init__.py b/mmseg/__init__.py index 765ff4a042..9f171ccb0a 100644 --- a/mmseg/__init__.py +++ b/mmseg/__init__.py @@ -9,7 +9,7 @@ MMCV_MIN = '2.0.0rc4' MMCV_MAX = '2.1.0' -MMENGINE_MIN = '0.2.0' +MMENGINE_MIN = '0.5.0' MMENGINE_MAX = '1.0.0' diff --git a/mmseg/apis/__init__.py b/mmseg/apis/__init__.py index 9933b99b3c..d22dc3f0ad 100644 --- a/mmseg/apis/__init__.py +++ b/mmseg/apis/__init__.py @@ -1,4 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from .inference import inference_model, init_model, show_result_pyplot +from .mmseg_inferencer import MMSegInferencer -__all__ = ['init_model', 'inference_model', 'show_result_pyplot'] +__all__ = [ + 'init_model', 'inference_model', 'show_result_pyplot', 'MMSegInferencer' +] diff --git a/mmseg/apis/inference.py b/mmseg/apis/inference.py index d1cc545598..4aadffc798 100644 --- a/mmseg/apis/inference.py +++ b/mmseg/apis/inference.py @@ -9,6 +9,7 @@ import torch from mmengine import Config from mmengine.dataset import Compose +from mmengine.registry import init_default_scope from mmengine.runner import load_checkpoint from mmengine.utils import mkdir_or_exist @@ -48,6 +49,8 @@ def init_model(config: Union[str, Path, Config], config.model.backbone.init_cfg = None config.model.pretrained = None config.model.train_cfg = None + init_default_scope(config.get('default_scope', 'mmseg')) + model = MODELS.build(config.model) if checkpoint is not None: checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') diff --git a/mmseg/apis/mmseg_inferencer.py b/mmseg/apis/mmseg_inferencer.py new file mode 100644 index 0000000000..cb387b10b3 --- /dev/null +++ b/mmseg/apis/mmseg_inferencer.py @@ -0,0 +1,361 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from typing import List, Optional, Sequence, Union + +import mmcv +import mmengine +import numpy as np +import torch +import torch.nn as nn +from mmcv.transforms import Compose +from mmengine.infer.infer import BaseInferencer, ModelType +from mmengine.model import revert_sync_batchnorm +from mmengine.registry import init_default_scope +from mmengine.runner.checkpoint import _load_checkpoint_to_model +from PIL import Image + +from mmseg.structures import SegDataSample +from mmseg.utils import ConfigType, SampleList, get_classes, get_palette +from mmseg.visualization import SegLocalVisualizer + +InputType = Union[str, np.ndarray] +InputsType = Union[InputType, Sequence[InputType]] +PredType = Union[SegDataSample, SampleList] + + +class MMSegInferencer(BaseInferencer): + """Semantic segmentation inferencer, provides inference and visualization + interfaces. Note: MMEngine >= 0.5.0 is required. + + Args: + model (str, optional): Path to the config file or the model name + defined in metafile. Take the `mmseg metafile `_ + as an example the `model` could be + "fcn_r50-d8_4xb2-40k_cityscapes-512x1024", and the weights of model + will be download automatically. If use config file, like + "configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py", the + `weights` should be defined. + weights (str, optional): Path to the checkpoint. If it is not specified + and model is a model name of metafile, the weights will be loaded + from metafile. Defaults to None. + classes (list, optional): Input classes for result rendering, as the + prediction of segmentation model is a segment map with label + indices, `classes` is a list which includes items responding to the + label indices. If classes is not defined, visualizer will take + `cityscapes` classes by default. Defaults to None. + palette (list, optional): Input palette for result rendering, which is + a list of color palette responding to the classes. If palette is + not defined, visualizer will take `cityscapes` palette by default. + Defaults to None. + dataset_name (str, optional): `Dataset name or alias `_ + visulizer will use the meta information of the dataset i.e. classes + and palette, but the `classes` and `palette` have higher priority. + Defaults to None. + device (str, optional): Device to run inference. If None, the available + device will be automatically used. Defaults to None. + scope (str, optional): The scope of the model. Defaults to 'mmseg'. + """ # noqa + + preprocess_kwargs: set = set() + forward_kwargs: set = {'mode', 'out_dir'} + visualize_kwargs: set = {'show', 'wait_time', 'img_out_dir', 'opacity'} + postprocess_kwargs: set = {'pred_out_dir', 'return_datasample'} + + def __init__(self, + model: Union[ModelType, str], + weights: Optional[str] = None, + classes: Optional[Union[str, List]] = None, + palette: Optional[Union[str, List]] = None, + dataset_name: Optional[str] = None, + device: Optional[str] = None, + scope: Optional[str] = 'mmseg') -> None: + # A global counter tracking the number of images processes, for + # naming of the output images + self.num_visualized_imgs = 0 + self.num_pred_imgs = 0 + init_default_scope(scope if scope else 'mmseg') + super().__init__( + model=model, weights=weights, device=device, scope=scope) + + if device == 'cpu' or not torch.cuda.is_available(): + self.model = revert_sync_batchnorm(self.model) + + assert isinstance(self.visualizer, SegLocalVisualizer) + self.visualizer.set_dataset_meta(palette, classes, dataset_name) + + def _load_weights_to_model(self, model: nn.Module, + checkpoint: Optional[dict], + cfg: Optional[ConfigType]) -> None: + """Loading model weights and meta information from cfg and checkpoint. + + Subclasses could override this method to load extra meta information + from ``checkpoint`` and ``cfg`` to model. + + Args: + model (nn.Module): Model to load weights and meta information. + checkpoint (dict, optional): The loaded checkpoint. + cfg (Config or ConfigDict, optional): The loaded config. + """ + + if checkpoint is not None: + _load_checkpoint_to_model(model, checkpoint) + checkpoint_meta = checkpoint.get('meta', {}) + # save the dataset_meta in the model for convenience + if 'dataset_meta' in checkpoint_meta: + # mmsegmentation 1.x + model.dataset_meta = { + 'classes': checkpoint_meta['dataset_meta'].get('classes'), + 'palette': checkpoint_meta['dataset_meta'].get('palette') + } + elif 'CLASSES' in checkpoint_meta: + # mmsegmentation 0.x + classes = checkpoint_meta['CLASSES'] + palette = checkpoint_meta.get('PALETTE', None) + model.dataset_meta = {'classes': classes, 'palette': palette} + else: + warnings.warn( + 'dataset_meta or class names are not saved in the ' + 'checkpoint\'s meta data, use classes of Cityscapes by ' + 'default.') + model.dataset_meta = { + 'classes': get_classes('cityscapes'), + 'palette': get_palette('cityscapes') + } + else: + warnings.warn('Checkpoint is not loaded, and the inference ' + 'result is calculated by the randomly initialized ' + 'model!') + warnings.warn( + 'weights is None, use cityscapes classes by default.') + model.dataset_meta = { + 'classes': get_classes('cityscapes'), + 'palette': get_palette('cityscapes') + } + + def __call__(self, + inputs: InputsType, + return_datasamples: bool = False, + batch_size: int = 1, + show: bool = False, + wait_time: int = 0, + out_dir: str = '', + img_out_dir: str = 'vis', + pred_out_dir: str = 'pred', + **kwargs) -> dict: + """Call the inferencer. + + Args: + inputs (Union[list, str, np.ndarray]): Inputs for the inferencer. + return_datasamples (bool): Whether to return results as + :obj:`SegDataSample`. Defaults to False. + batch_size (int): Batch size. Defaults to 1. + show (bool): Whether to display the rendering color segmentation + mask in a popup window. Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + out_dir (str): Output directory of inference results. Defaults + to ''. + img_out_dir (str): Subdirectory of `out_dir`, used to save + rendering color segmentation mask, so `out_dir` must be defined + if you would like to save predicted mask. Defaults to 'vis'. + pred_out_dir (str): Subdirectory of `out_dir`, used to save + predicted mask file, so `out_dir` must be defined if you would + like to save predicted mask. Defaults to 'pred'. + + **kwargs: Other keyword arguments passed to :meth:`preprocess`, + :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. + Each key in kwargs should be in the corresponding set of + ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs`` + and ``postprocess_kwargs``. + + + Returns: + dict: Inference and visualization results. + """ + + if out_dir != '': + pred_out_dir = osp.join(out_dir, pred_out_dir) + img_out_dir = osp.join(out_dir, img_out_dir) + else: + pred_out_dir = '' + img_out_dir = '' + + return super().__call__( + inputs=inputs, + return_datasamples=return_datasamples, + batch_size=batch_size, + show=show, + wait_time=wait_time, + img_out_dir=img_out_dir, + pred_out_dir=pred_out_dir, + **kwargs) + + def visualize(self, + inputs: list, + preds: List[dict], + show: bool = False, + wait_time: int = 0, + img_out_dir: str = '', + opacity: float = 0.8) -> List[np.ndarray]: + """Visualize predictions. + + Args: + inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`. + preds (Any): Predictions of the model. + show (bool): Whether to display the image in a popup window. + Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + img_out_dir (str): Output directory of rendering prediction i.e. + color segmentation mask. Defaults: '' + opacity (int, float): The transparency of segmentation mask. + Defaults to 0.8. + + Returns: + List[np.ndarray]: Visualization results. + """ + if self.visualizer is None or (not show and img_out_dir == ''): + return None + + if getattr(self, 'visualizer') is None: + raise ValueError('Visualization needs the "visualizer" term' + 'defined in the config, but got None') + self.visualizer.set_dataset_meta(**self.model.dataset_meta) + self.visualizer.alpha = opacity + + results = [] + + for single_input, pred in zip(inputs, preds): + if isinstance(single_input, str): + img_bytes = mmengine.fileio.get(single_input) + img = mmcv.imfrombytes(img_bytes) + img = img[:, :, ::-1] + img_name = osp.basename(single_input) + elif isinstance(single_input, np.ndarray): + img = single_input.copy() + img_num = str(self.num_visualized_imgs).zfill(8) + '_vis' + img_name = f'{img_num}.jpg' + else: + raise ValueError('Unsupported input type:' + f'{type(single_input)}') + + out_file = osp.join(img_out_dir, img_name) if img_out_dir != ''\ + else None + + self.visualizer.add_datasample( + img_name, + img, + pred, + show=show, + wait_time=wait_time, + draw_gt=False, + draw_pred=True, + out_file=out_file) + results.append(self.visualizer.get_image()) + self.num_visualized_imgs += 1 + + return results + + def postprocess(self, + preds: PredType, + visualization: List[np.ndarray], + return_datasample: bool = False, + pred_out_dir: str = '') -> dict: + """Process the predictions and visualization results from ``forward`` + and ``visualize``. + + This method should be responsible for the following tasks: + + 1. Pack the predictions and visualization results and return them. + 2. Save the predictions, if it needed. + + Args: + preds (List[Dict]): Predictions of the model. + visualization (List[np.ndarray]): The list of rendering color + segmentation mask. + return_datasample (bool): Whether to return results as datasamples. + Defaults to False. + pred_out_dir: File to save the inference results w/o + visualization. If left as empty, no file will be saved. + Defaults to ''. + + Returns: + dict: Inference and visualization results with key ``predictions`` + and ``visualization`` + + - ``visualization (Any)``: Returned by :meth:`visualize` + - ``predictions`` (List[np.ndarray], np.ndarray): Returned by + :meth:`forward` and processed in :meth:`postprocess`. + If ``return_datasample=False``, it will be the segmentation mask + with label indice. + """ + if return_datasample: + if len(preds) == 1: + return preds[0] + else: + return preds + + results_dict = {} + + results_dict['predictions'] = [] + results_dict['visualization'] = [] + + for i, pred in enumerate(preds): + pred_data = pred.pred_sem_seg.numpy().data[0] + results_dict['predictions'].append(pred_data) + if visualization is not None: + vis = visualization[i] + results_dict['visualization'].append(vis) + if pred_out_dir != '': + mmengine.mkdir_or_exist(pred_out_dir) + img_name = str(self.num_pred_imgs).zfill(8) + '_pred.png' + img_path = osp.join(pred_out_dir, img_name) + output = Image.fromarray(pred_data.astype(np.uint8)) + output.save(img_path) + self.num_pred_imgs += 1 + + if len(results_dict['predictions']) == 1: + results_dict['predictions'] = results_dict['predictions'][0] + if visualization is not None: + results_dict['visualization'] = \ + results_dict['visualization'][0] + return results_dict + + def _init_pipeline(self, cfg: ConfigType) -> Compose: + """Initialize the test pipeline. + + Return a pipeline to handle various input data, such as ``str``, + ``np.ndarray``. It is an abstract method in BaseInferencer, and should + be implemented in subclasses. + + The returned pipeline will be used to process a single data. + It will be used in :meth:`preprocess` like this: + + .. code-block:: python + def preprocess(self, inputs, batch_size, **kwargs): + ... + dataset = map(self.pipeline, dataset) + ... + """ + pipeline_cfg = cfg.test_dataloader.dataset.pipeline + # Loading annotations is also not applicable + idx = self._get_transform_idx(pipeline_cfg, 'LoadAnnotations') + if idx != -1: + del pipeline_cfg[idx] + load_img_idx = self._get_transform_idx(pipeline_cfg, + 'LoadImageFromFile') + + if load_img_idx == -1: + raise ValueError( + 'LoadImageFromFile is not found in the test pipeline') + pipeline_cfg[load_img_idx]['type'] = 'InferencerLoader' + return Compose(pipeline_cfg) + + def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int: + """Returns the index of the transform in a pipeline. + + If the transform is not found, returns -1. + """ + for i, transform in enumerate(pipeline_cfg): + if transform['type'] == name: + return i + return -1 diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py index 8aa2e8d1a8..0dd19ee312 100644 --- a/mmseg/datasets/__init__.py +++ b/mmseg/datasets/__init__.py @@ -17,6 +17,7 @@ from .night_driving import NightDrivingDataset from .pascal_context import PascalContextDataset, PascalContextDataset59 from .potsdam import PotsdamDataset +from .refuge import REFUGEDataset from .stare import STAREDataset from .synapse import SynapseDataset # yapf: disable @@ -48,5 +49,5 @@ 'DecathlonDataset', 'LIPDataset', 'ResizeShortestEdge', 'BioMedicalGaussianNoise', 'BioMedicalGaussianBlur', 'BioMedicalRandomGamma', 'BioMedical3DPad', 'RandomRotFlip', - 'SynapseDataset' + 'SynapseDataset', 'REFUGEDataset' ] diff --git a/mmseg/datasets/basesegdataset.py b/mmseg/datasets/basesegdataset.py index bf433b2094..ddf476bae9 100644 --- a/mmseg/datasets/basesegdataset.py +++ b/mmseg/datasets/basesegdataset.py @@ -73,38 +73,36 @@ class BaseSegDataset(BaseDataset): ignore_index (int): The label index to be ignored. Default: 255 reduce_zero_label (bool): Whether to mark label zero as ignored. Default to False. - backend_args (dict): Arguments to instantiate a file backend. + backend_args (dict, Optional): Arguments to instantiate a file backend. See https://mmengine.readthedocs.io/en/latest/api/fileio.htm - for details. Defaults to ``dict(backend='local')`` + for details. Defaults to None. Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ METAINFO: dict = dict() - def __init__( - self, - ann_file: str = '', - img_suffix='.jpg', - seg_map_suffix='.png', - metainfo: Optional[dict] = None, - data_root: Optional[str] = None, - data_prefix: dict = dict(img_path='', seg_map_path=''), - filter_cfg: Optional[dict] = None, - indices: Optional[Union[int, Sequence[int]]] = None, - serialize_data: bool = True, - pipeline: List[Union[dict, Callable]] = [], - test_mode: bool = False, - lazy_init: bool = False, - max_refetch: int = 1000, - ignore_index: int = 255, - reduce_zero_label: bool = False, - backend_args: dict = dict(backend='local') - ) -> None: + def __init__(self, + ann_file: str = '', + img_suffix='.jpg', + seg_map_suffix='.png', + metainfo: Optional[dict] = None, + data_root: Optional[str] = None, + data_prefix: dict = dict(img_path='', seg_map_path=''), + filter_cfg: Optional[dict] = None, + indices: Optional[Union[int, Sequence[int]]] = None, + serialize_data: bool = True, + pipeline: List[Union[dict, Callable]] = [], + test_mode: bool = False, + lazy_init: bool = False, + max_refetch: int = 1000, + ignore_index: int = 255, + reduce_zero_label: bool = False, + backend_args: Optional[dict] = None) -> None: self.img_suffix = img_suffix self.seg_map_suffix = seg_map_suffix self.ignore_index = ignore_index self.reduce_zero_label = reduce_zero_label - self.backend_args = backend_args.copy() + self.backend_args = backend_args.copy() if backend_args else None self.data_root = data_root self.data_prefix = copy.copy(data_prefix) diff --git a/mmseg/datasets/refuge.py b/mmseg/datasets/refuge.py new file mode 100644 index 0000000000..4016a825a3 --- /dev/null +++ b/mmseg/datasets/refuge.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine.fileio as fileio + +from mmseg.registry import DATASETS +from .basesegdataset import BaseSegDataset + + +@DATASETS.register_module() +class REFUGEDataset(BaseSegDataset): + """REFUGE dataset. + + In segmentation map annotation for REFUGE, 0 stands for background, which + is not included in 2 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + METAINFO = dict( + classes=('background', ' Optic Cup', 'Optic Disc'), + palette=[[120, 120, 120], [6, 230, 230], [56, 59, 120]]) + + def __init__(self, **kwargs) -> None: + super().__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) + assert fileio.exists( + self.data_prefix['img_path'], backend_args=self.backend_args) diff --git a/mmseg/datasets/transforms/formatting.py b/mmseg/datasets/transforms/formatting.py index f4018f788f..4391161dfd 100644 --- a/mmseg/datasets/transforms/formatting.py +++ b/mmseg/datasets/transforms/formatting.py @@ -63,8 +63,12 @@ def transform(self, results: dict) -> dict: img = results['img'] if len(img.shape) < 3: img = np.expand_dims(img, -1) - img = np.ascontiguousarray(img.transpose(2, 0, 1)) - packed_results['inputs'] = to_tensor(img) + if not img.flags.c_contiguous: + img = to_tensor(np.ascontiguousarray(img.transpose(2, 0, 1))) + else: + img = img.transpose(2, 0, 1) + img = to_tensor(img).contiguous() + packed_results['inputs'] = img data_sample = SegDataSample() if 'gt_seg_map' in results: diff --git a/mmseg/datasets/transforms/loading.py b/mmseg/datasets/transforms/loading.py index 5a413717b6..d2e93b1abb 100644 --- a/mmseg/datasets/transforms/loading.py +++ b/mmseg/datasets/transforms/loading.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import warnings -from typing import Dict +from typing import Dict, Optional, Union import mmcv import mmengine.fileio as fileio @@ -56,14 +56,14 @@ class LoadAnnotations(MMCV_LoadAnnotations): Defaults to 'pillow'. backend_args (dict): Arguments to instantiate a file backend. See https://mmengine.readthedocs.io/en/latest/api/fileio.htm - for details. Defaults to ``dict(backend='local')`` + for details. Defaults to None. Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ def __init__( self, reduce_zero_label=None, - backend_args=dict(backend='local'), + backend_args=None, imdecode_backend='pillow', ) -> None: super().__init__( @@ -203,23 +203,21 @@ class LoadBiomedicalImageFromFile(BaseTransform): to_float32 (bool): Whether to convert the loaded image to a float32 numpy array. If set to False, the loaded image is an float64 array. Defaults to True. - backend_args (dict): Arguments to instantiate a file backend. + backend_args (dict, Optional): Arguments to instantiate a file backend. See https://mmengine.readthedocs.io/en/latest/api/fileio.htm - for details. Defaults to ``dict(backend='local')`` + for details. Defaults to None. Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ - def __init__( - self, - decode_backend: str = 'nifti', - to_xyz: bool = False, - to_float32: bool = True, - backend_args: dict = dict(backend='local') - ) -> None: + def __init__(self, + decode_backend: str = 'nifti', + to_xyz: bool = False, + to_float32: bool = True, + backend_args: Optional[dict] = None) -> None: self.decode_backend = decode_backend self.to_xyz = to_xyz self.to_float32 = to_float32 - self.backend_args = backend_args.copy() + self.backend_args = backend_args.copy() if backend_args else None def transform(self, results: Dict) -> Dict: """Functions to load image. @@ -295,24 +293,22 @@ class LoadBiomedicalAnnotation(BaseTransform): to_float32 (bool): Whether to convert the loaded seg map to a float32 numpy array. If set to False, the loaded image is an float64 array. Defaults to True. - backend_args (dict): Arguments to instantiate a file backend. + backend_args (dict, Optional): Arguments to instantiate a file backend. See :class:`mmengine.fileio` for details. - Defaults to ``dict(backend='local')``. + Defaults to None. Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ - def __init__( - self, - decode_backend: str = 'nifti', - to_xyz: bool = False, - to_float32: bool = True, - backend_args: dict = dict(backend='local') - ) -> None: + def __init__(self, + decode_backend: str = 'nifti', + to_xyz: bool = False, + to_float32: bool = True, + backend_args: Optional[dict] = None) -> None: super().__init__() self.decode_backend = decode_backend self.to_xyz = to_xyz self.to_float32 = to_float32 - self.backend_args = backend_args.copy() + self.backend_args = backend_args.copy() if backend_args else None def transform(self, results: Dict) -> Dict: """Functions to load image. @@ -384,23 +380,21 @@ class LoadBiomedicalData(BaseTransform): backend is 'nifti'. Defaults to 'nifti'. to_xyz (bool): Whether transpose data from Z, Y, X to X, Y, Z. Defaults to False. - backend_args (dict): Arguments to instantiate a file backend. + backend_args (dict, Optional): Arguments to instantiate a file backend. See https://mmengine.readthedocs.io/en/latest/api/fileio.htm - for details. Defaults to ``dict(backend='local')`` + for details. Defaults to None. Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ - def __init__( - self, - with_seg=False, - decode_backend: str = 'numpy', - to_xyz: bool = False, - backend_args: dict = dict(backend='local') - ) -> None: # noqa + def __init__(self, + with_seg=False, + decode_backend: str = 'numpy', + to_xyz: bool = False, + backend_args: Optional[dict] = None) -> None: # noqa self.with_seg = with_seg self.decode_backend = decode_backend self.to_xyz = to_xyz - self.backend_args = backend_args.copy() + self.backend_args = backend_args.copy() if backend_args else None def transform(self, results: Dict) -> Dict: """Functions to load image. @@ -443,3 +437,59 @@ def __repr__(self) -> str: f'to_xyz={self.to_xyz}, ' f'backend_args={self.backend_args})') return repr_str + + +@TRANSFORMS.register_module() +class InferencerLoader(BaseTransform): + """Load an image from ``results['img']``. + + Similar with :obj:`LoadImageFromFile`, but the image has been loaded as + :obj:`np.ndarray` in ``results['img']``. Can be used when loading image + from webcam. + + Required Keys: + + - img + + Modified Keys: + + - img + - img_path + - img_shape + - ori_shape + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + """ + + def __init__(self, **kwargs) -> None: + super().__init__() + self.from_file = TRANSFORMS.build( + dict(type='LoadImageFromFile', **kwargs)) + self.from_ndarray = TRANSFORMS.build( + dict(type='LoadImageFromNDArray', **kwargs)) + + def transform(self, single_input: Union[str, np.ndarray, dict]) -> dict: + """Transform function to add image meta information. + + Args: + results (dict): Result dict with Webcam read image in + ``results['img']``. + + Returns: + dict: The dict contains loaded image and meta information. + """ + if isinstance(single_input, str): + inputs = dict(img_path=single_input) + elif isinstance(single_input, np.ndarray): + inputs = dict(img=single_input) + elif isinstance(single_input, dict): + inputs = single_input + else: + raise NotImplementedError + + if 'img' in inputs: + return self.from_ndarray(inputs) + return self.from_file(inputs) diff --git a/mmseg/engine/hooks/visualization_hook.py b/mmseg/engine/hooks/visualization_hook.py index 25aa1cf8b5..1e7c97afe8 100644 --- a/mmseg/engine/hooks/visualization_hook.py +++ b/mmseg/engine/hooks/visualization_hook.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import warnings -from typing import Sequence +from typing import Optional, Sequence import mmcv import mmengine.fileio as fileio @@ -30,9 +30,9 @@ class SegVisualizationHook(Hook): interval (int): The interval of visualization. Defaults to 50. show (bool): Whether to display the drawn image. Default to False. wait_time (float): The interval of show (s). Defaults to 0. - backend_args (dict): Arguments to instantiate a file backend. + backend_args (dict, Optional): Arguments to instantiate a file backend. See https://mmengine.readthedocs.io/en/latest/api/fileio.htm - for details. Defaults to ``dict(backend='local')`` + for details. Defaults to None. Notes: mmcv>=2.0.0rc4, mmengine>=0.2.0 required. """ @@ -41,7 +41,7 @@ def __init__(self, interval: int = 50, show: bool = False, wait_time: float = 0., - backend_args: dict = dict(backend='local')): + backend_args: Optional[dict] = None): self._visualizer: SegLocalVisualizer = \ SegLocalVisualizer.get_current_instance() self.interval = interval @@ -55,7 +55,7 @@ def __init__(self, 'needs to be excluded.') self.wait_time = wait_time - self.backend_args = backend_args.copy() + self.backend_args = backend_args.copy() if backend_args else None self.draw = draw if not self.draw: warnings.warn('The draw is False, it means that the ' diff --git a/mmseg/models/segmentors/cascade_encoder_decoder.py b/mmseg/models/segmentors/cascade_encoder_decoder.py index c932b43069..0184a3533a 100644 --- a/mmseg/models/segmentors/cascade_encoder_decoder.py +++ b/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -68,6 +68,7 @@ def _init_decode_head(self, decode_head: ConfigType) -> None: self.decode_head.append(MODELS.build(decode_head[i])) self.align_corners = self.decode_head[-1].align_corners self.num_classes = self.decode_head[-1].num_classes + self.out_channels = self.decode_head[-1].out_channels def encode_decode(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor: diff --git a/mmseg/registry/__init__.py b/mmseg/registry/__init__.py index c646b7e5ac..ee514d1a2a 100644 --- a/mmseg/registry/__init__.py +++ b/mmseg/registry/__init__.py @@ -1,13 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .registry import (DATA_SAMPLERS, DATASETS, HOOKS, LOOPS, METRICS, - MODEL_WRAPPERS, MODELS, OPTIM_WRAPPER_CONSTRUCTORS, - OPTIMIZERS, PARAM_SCHEDULERS, RUNNER_CONSTRUCTORS, - RUNNERS, TASK_UTILS, TRANSFORMS, VISBACKENDS, - VISUALIZERS, WEIGHT_INITIALIZERS) +from .registry import (DATA_SAMPLERS, DATASETS, EVALUATOR, HOOKS, INFERENCERS, + LOG_PROCESSORS, LOOPS, METRICS, MODEL_WRAPPERS, MODELS, + OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, OPTIMIZERS, + PARAM_SCHEDULERS, RUNNER_CONSTRUCTORS, RUNNERS, + TASK_UTILS, TRANSFORMS, VISBACKENDS, VISUALIZERS, + WEIGHT_INITIALIZERS) __all__ = [ - 'RUNNERS', 'RUNNER_CONSTRUCTORS', 'HOOKS', 'DATASETS', 'DATA_SAMPLERS', - 'TRANSFORMS', 'MODELS', 'WEIGHT_INITIALIZERS', 'OPTIMIZERS', - 'OPTIM_WRAPPER_CONSTRUCTORS', 'TASK_UTILS', 'PARAM_SCHEDULERS', 'METRICS', - 'MODEL_WRAPPERS', 'LOOPS', 'VISBACKENDS', 'VISUALIZERS' + 'HOOKS', 'DATASETS', 'DATA_SAMPLERS', 'TRANSFORMS', 'MODELS', + 'WEIGHT_INITIALIZERS', 'OPTIMIZERS', 'OPTIM_WRAPPER_CONSTRUCTORS', + 'TASK_UTILS', 'PARAM_SCHEDULERS', 'METRICS', 'MODEL_WRAPPERS', + 'VISBACKENDS', 'VISUALIZERS', 'RUNNERS', 'RUNNER_CONSTRUCTORS', 'LOOPS', + 'EVALUATOR', 'LOG_PROCESSORS', 'OPTIM_WRAPPERS', 'INFERENCERS' ] diff --git a/mmseg/registry/registry.py b/mmseg/registry/registry.py index 5c9977ab8d..32684e758f 100644 --- a/mmseg/registry/registry.py +++ b/mmseg/registry/registry.py @@ -10,6 +10,7 @@ from mmengine.registry import DATASETS as MMENGINE_DATASETS from mmengine.registry import EVALUATOR as MMENGINE_EVALUATOR from mmengine.registry import HOOKS as MMENGINE_HOOKS +from mmengine.registry import INFERENCERS as MMENGINE_INFERENCERS from mmengine.registry import LOG_PROCESSORS as MMENGINE_LOG_PROCESSORS from mmengine.registry import LOOPS as MMENGINE_LOOPS from mmengine.registry import METRICS as MMENGINE_METRICS @@ -39,45 +40,82 @@ # manage all kinds of loops like `EpochBasedTrainLoop` LOOPS = Registry('loop', parent=MMENGINE_LOOPS) # manage all kinds of hooks like `CheckpointHook` -HOOKS = Registry('hook', parent=MMENGINE_HOOKS) +HOOKS = Registry( + 'hook', parent=MMENGINE_HOOKS, locations=['mmseg.engine.hooks']) # manage data-related modules -DATASETS = Registry('dataset', parent=MMENGINE_DATASETS) -DATA_SAMPLERS = Registry('data sampler', parent=MMENGINE_DATA_SAMPLERS) -TRANSFORMS = Registry('transform', parent=MMENGINE_TRANSFORMS) +DATASETS = Registry( + 'dataset', parent=MMENGINE_DATASETS, locations=['mmseg.datasets']) +DATA_SAMPLERS = Registry( + 'data sampler', + parent=MMENGINE_DATA_SAMPLERS, + locations=['mmseg.datasets.samplers']) +TRANSFORMS = Registry( + 'transform', + parent=MMENGINE_TRANSFORMS, + locations=['mmseg.datasets.transforms']) # mangage all kinds of modules inheriting `nn.Module` -MODELS = Registry('model', parent=MMENGINE_MODELS) +MODELS = Registry('model', parent=MMENGINE_MODELS, locations=['mmseg.models']) # mangage all kinds of model wrappers like 'MMDistributedDataParallel' -MODEL_WRAPPERS = Registry('model_wrapper', parent=MMENGINE_MODEL_WRAPPERS) +MODEL_WRAPPERS = Registry( + 'model_wrapper', + parent=MMENGINE_MODEL_WRAPPERS, + locations=['mmseg.models']) # mangage all kinds of weight initialization modules like `Uniform` WEIGHT_INITIALIZERS = Registry( - 'weight initializer', parent=MMENGINE_WEIGHT_INITIALIZERS) + 'weight initializer', + parent=MMENGINE_WEIGHT_INITIALIZERS, + locations=['mmseg.models']) # mangage all kinds of optimizers like `SGD` and `Adam` -OPTIMIZERS = Registry('optimizer', parent=MMENGINE_OPTIMIZERS) +OPTIMIZERS = Registry( + 'optimizer', + parent=MMENGINE_OPTIMIZERS, + locations=['mmseg.engine.optimizers']) # manage optimizer wrapper -OPTIM_WRAPPERS = Registry('optim_wrapper', parent=MMENGINE_OPTIM_WRAPPERS) +OPTIM_WRAPPERS = Registry( + 'optim_wrapper', + parent=MMENGINE_OPTIM_WRAPPERS, + locations=['mmseg.engine.optimizers']) # manage constructors that customize the optimization hyperparameters. OPTIM_WRAPPER_CONSTRUCTORS = Registry( 'optimizer wrapper constructor', - parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS) + parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS, + locations=['mmseg.engine.optimizers']) # mangage all kinds of parameter schedulers like `MultiStepLR` PARAM_SCHEDULERS = Registry( - 'parameter scheduler', parent=MMENGINE_PARAM_SCHEDULERS) + 'parameter scheduler', + parent=MMENGINE_PARAM_SCHEDULERS, + locations=['mmseg.engine.schedulers']) # manage all kinds of metrics -METRICS = Registry('metric', parent=MMENGINE_METRICS) +METRICS = Registry( + 'metric', parent=MMENGINE_METRICS, locations=['mmseg.evaluation']) # manage evaluator -EVALUATOR = Registry('evaluator', parent=MMENGINE_EVALUATOR) +EVALUATOR = Registry( + 'evaluator', parent=MMENGINE_EVALUATOR, locations=['mmseg.evaluation']) # manage task-specific modules like ohem pixel sampler -TASK_UTILS = Registry('task util', parent=MMENGINE_TASK_UTILS) +TASK_UTILS = Registry( + 'task util', parent=MMENGINE_TASK_UTILS, locations=['mmseg.models']) # manage visualizer -VISUALIZERS = Registry('visualizer', parent=MMENGINE_VISUALIZERS) +VISUALIZERS = Registry( + 'visualizer', + parent=MMENGINE_VISUALIZERS, + locations=['mmseg.visualization']) # manage visualizer backend -VISBACKENDS = Registry('vis_backend', parent=MMENGINE_VISBACKENDS) +VISBACKENDS = Registry( + 'vis_backend', + parent=MMENGINE_VISBACKENDS, + locations=['mmseg.visualization']) # manage logprocessor -LOG_PROCESSORS = Registry('log_processor', parent=MMENGINE_LOG_PROCESSORS) +LOG_PROCESSORS = Registry( + 'log_processor', + parent=MMENGINE_LOG_PROCESSORS, + locations=['mmseg.visualization']) + +# manage inferencer +INFERENCERS = Registry('inferencer', parent=MMENGINE_INFERENCERS) diff --git a/mmseg/version.py b/mmseg/version.py index 10ceca8120..ef8e391a29 100644 --- a/mmseg/version.py +++ b/mmseg/version.py @@ -1,6 +1,6 @@ # Copyright (c) Open-MMLab. All rights reserved. -__version__ = '1.0.0rc5' +__version__ = '1.0.0rc6' def parse_version_info(version_str): diff --git a/mmseg/visualization/local_visualizer.py b/mmseg/visualization/local_visualizer.py index 27443f2c57..d11ad79c81 100644 --- a/mmseg/visualization/local_visualizer.py +++ b/mmseg/visualization/local_visualizer.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional import mmcv import numpy as np @@ -24,6 +24,17 @@ class SegLocalVisualizer(Visualizer): Defaults to None. save_dir (str, optional): Save file dir for all storage backends. If it is None, the backend storage will not save any data. + classes (list, optional): Input classes for result rendering, as the + prediction of segmentation model is a segment map with label + indices, `classes` is a list which includes items responding to the + label indices. If classes is not defined, visualizer will take + `cityscapes` classes by default. Defaults to None. + palette (list, optional): Input palette for result rendering, which is + a list of color palette responding to the classes. Defaults to None. + dataset_name (str, optional): `Dataset name or alias `_ + visulizer will use the meta information of the dataset i.e. classes + and palette, but the `classes` and `palette` have higher priority. + Defaults to None. alpha (int, float): The transparency of segmentation mask. Defaults to 0.8. @@ -49,43 +60,40 @@ class SegLocalVisualizer(Visualizer): >>> seg_local_visualizer.add_datasample( ... 'visualizer_example', image, ... gt_seg_data_sample, show=True) - """ + """ # noqa def __init__(self, name: str = 'visualizer', image: Optional[np.ndarray] = None, vis_backends: Optional[Dict] = None, save_dir: Optional[str] = None, - palette: Optional[Union[str, List]] = None, - classes: Optional[Union[str, List]] = None, + classes: Optional[List] = None, + palette: Optional[List] = None, dataset_name: Optional[str] = None, alpha: float = 0.8, **kwargs): super().__init__(name, image, vis_backends, save_dir, **kwargs) self.alpha: float = alpha - # Set default value. When calling - # `SegLocalVisualizer().dataset_meta=xxx`, - # it will override the default value. - if dataset_name is None: - dataset_name = 'cityscapes' - classes = classes if classes else get_classes(dataset_name) - palette = palette if palette else get_palette(dataset_name) - assert len(classes) == len( - palette), 'The length of classes should be equal to palette' - self.dataset_meta: dict = {'classes': classes, 'palette': palette} + self.set_dataset_meta(palette, classes, dataset_name) def _draw_sem_seg(self, image: np.ndarray, sem_seg: PixelData, - classes: Optional[Tuple[str]], - palette: Optional[List[List[int]]]) -> np.ndarray: + classes: Optional[List], + palette: Optional[List]) -> np.ndarray: """Draw semantic seg of GT or prediction. Args: image (np.ndarray): The image to draw. - sem_seg (:obj:`PixelData`): Data structure for - pixel-level annotations or predictions. - classes (Tuple[str], optional): Category information. - palette (List[List[int]], optional): The palette of - segmentation map. + sem_seg (:obj:`PixelData`): Data structure for pixel-level + annotations or predictions. + classes (list, optional): Input classes for result rendering, as + the prediction of segmentation model is a segment map with + label indices, `classes` is a list which includes items + responding to the label indices. If classes is not defined, + visualizer will take `cityscapes` classes by default. + Defaults to None. + palette (list, optional): Input palette for result rendering, which + is a list of color palette responding to the classes. + Defaults to None. Returns: np.ndarray: the drawn image which channel is RGB. @@ -109,6 +117,38 @@ def _draw_sem_seg(self, image: np.ndarray, sem_seg: PixelData, return self.get_image() + def set_dataset_meta(self, + classes: Optional[List] = None, + palette: Optional[List] = None, + dataset_name: Optional[str] = None) -> None: + """Set meta information to visualizer. + + Args: + classes (list, optional): Input classes for result rendering, as + the prediction of segmentation model is a segment map with + label indices, `classes` is a list which includes items + responding to the label indices. If classes is not defined, + visualizer will take `cityscapes` classes by default. + Defaults to None. + palette (list, optional): Input palette for result rendering, which + is a list of color palette responding to the classes. + Defaults to None. + dataset_name (str, optional): `Dataset name or alias `_ + visulizer will use the meta information of the dataset i.e. + classes and palette, but the `classes` and `palette` have + higher priority. Defaults to None. + """ # noqa + # Set default value. When calling + # `SegLocalVisualizer().dataset_meta=xxx`, + # it will override the default value. + if dataset_name is None: + dataset_name = 'cityscapes' + classes = classes if classes else get_classes(dataset_name) + palette = palette if palette else get_palette(dataset_name) + assert len(classes) == len( + palette), 'The length of classes should be equal to palette' + self.dataset_meta: dict = {'classes': classes, 'palette': palette} + @master_only def add_datasample( self, @@ -186,6 +226,6 @@ def add_datasample( self.show(drawn_img, win_name=name, wait_time=wait_time) if out_file is not None: - mmcv.imwrite(drawn_img, out_file) + mmcv.imwrite(mmcv.bgr2rgb(drawn_img), out_file) else: self.add_image(name, drawn_img, step) diff --git a/projects/README.md b/projects/README.md index 40d515eda3..5482c479aa 100644 --- a/projects/README.md +++ b/projects/README.md @@ -1,9 +1,19 @@ # Projects -Implementing new models and features into OpenMMLab's algorithm libraries could be troublesome due to the rigorous requirements on code quality, which could hinder the fast iteration of SOTA models and might discourage our members from sharing their latest outcomes here. +The OpenMMLab ecosystem can only grow through the contributions of the community. +Everyone is welcome to post their implementation of any great ideas in this folder! If you wish to start your own project, please go through the [example project](example_project/) for the best practice. For common questions about projects, please read our [faq](faq.md). -And that's why we have this `Projects/` folder now, where some experimental features, frameworks and models are placed, only needed to satisfy the minimum requirement on the code quality, and can be used as standalone libraries. Users are welcome to use them if they [use MMSegmentation from source](https://mmsegmentation.readthedocs.io/en/dev-1.x/get_started.html#best-practices). +## External Projects -Everyone is welcome to post their implementation of any great ideas in this folder! If you wish to start your own project, please go through the [example project](example_project/) for the best practice. +There are also selected external projects released in the community that use MMSegmentation: -Note: The core maintainers of MMSegmentation only ensure the results are reproducible and the code quality meets its claim at the time each project was submitted, but they may not be responsible for future maintenance. The original authors take responsibility for maintaining their own projects. +- [SegNeXt: Rethinking Convolutional Attention Design for Semantic Segmentation](https://github.com/visual-attention-network/segnext) +- [Vision Transformer Adapter for Dense Predictions](https://github.com/czczup/ViT-Adapter) +- [UniFormer: Unifying Convolution and Self-attention for Visual Recognition](https://github.com/Sense-X/UniFormer) +- [Multi-Scale High-Resolution Vision Transformer for Semantic Segmentation](https://github.com/facebookresearch/HRViT) +- [ViTAE: Vision Transformer Advanced by Exploring Intrinsic Inductive Bias](https://github.com/ViTAE-Transformer/ViTAE-Transformer) +- [DAFormer: Improving Network Architectures and Training Strategies for Domain-Adaptive Semantic Segmentation](https://github.com/lhoyer/DAFormer) +- [MPViT : Multi-Path Vision Transformer for Dense Prediction](https://github.com/youngwanLEE/MPViT) +- [TopFormer: Token Pyramid Transformer for Mobile Semantic Segmentation](https://github.com/hustvl/TopFormer) + +Note: These projects are supported and maintained by their own contributors. The core maintainers of MMSegmentation only ensure the results are reproducible and the code quality meets its claim at the time each project was submitted, but they may not be responsible for future maintenance. diff --git a/projects/example_project/README.md b/projects/example_project/README.md index 27ca5d4e2a..4338b8acac 100644 --- a/projects/example_project/README.md +++ b/projects/example_project/README.md @@ -1,20 +1,26 @@ # Dummy ResNet Wrapper -This is an example README for community `projects/`. We have provided detailed explanations for each field in the form of html comments, which are visible when you read the source of this README file. If you wish to submit your project to our main repository, then all the fields in this README are mandatory for others to understand what you have achieved in this implementation. For more details, read our [contribution guide](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/.github/CONTRIBUTING.md) or approach us in [Discussions](https://github.com/open-mmlab/mmsegmentation/discussions). +> A README.md template for releasing a project. +> +> All the fields in this README are **mandatory** for others to understand what you have achieved in this implementation. +> Please read our [Projects FAQ](../faq.md) if you still feel unclear about the requirements, or raise an [issue](https://github.com/open-mmlab/mmsegmentation/issues) to us! ## Description - +Author: @xxx. This project implements a dummy ResNet wrapper, which literally does nothing new but prints "hello world" during initialization. ## Usage - +> For a typical model, this section should contain the commands for training and testing. +> You are also suggested to dump your environment specification to env.yml by `conda env export > env.yml`. ### Prerequisites @@ -47,9 +53,8 @@ mim train mmsegmentation configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.p mim test mmsegmentation configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.py --work-dir work_dirs/dummy_resnet --checkpoint ${CHECKPOINT_PATH} ``` - +> List the results as usually done in other model's README. \[Example\](https://github.com/open-mmlab/mmsegmentation/tree/dev-1.x/configs/fcn#results-and-models +> You should claim whether this is based on the pre-trained weights, which are converted from the official release; or it's a reproduced result obtained from retraining the model in this project | Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | | ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -57,7 +62,7 @@ You should claim whether this is based on the pre-trained weights, which are con ## Citation - +> You may remove this section if not applicable. ```bibtex @misc{mmseg2020, @@ -72,58 +77,58 @@ You should claim whether this is based on the pre-trained weights, which are con Here is a checklist illustrating a usual development workflow of a successful project, and also serves as an overview of this project's progress. - +> A project does not necessarily have to be finished in a single PR, but it's essential for the project to at least reach the first milestone in its very first PR. - [ ] Milestone 1: PR-ready, and acceptable to be one of the `projects/`. - [ ] Finish the code - +> The code's design shall follow existing interfaces and convention. For example, each model component should be registered into `mmseg.registry.MODELS` and configurable via a config file. - - [ ] Basic docstrings & proper citation +- [ ] Basic docstrings & proper citation - +> Each major object should contain a docstring, describing its functionality and arguments. If you have adapted the code from other open-source projects, don't forget to cite the source project in docstring and make sure your behavior is not against its license. Typically, we do not accept any code snippet under GPL license. [A Short Guide to Open Source Licenses](https://medium.com/nationwide-technology/a-short-guide-to-open-source-licenses-cf5b1c329edd) - - [ ] Test-time correctness +- [ ] Test-time correctness - +> If you are reproducing the result from a paper, make sure your model's inference-time performance matches that in the original paper. The weights usually could be obtained by simply renaming the keys in the official pre-trained weights. This test could be skipped though, if you are able to prove the training-time correctness and check the second milestone. - - [ ] A full README +- [ ] A full README - +> As this template does. - [ ] Milestone 2: Indicates a successful model implementation. - [ ] Training-time correctness - +> If you are reproducing the result from a paper, checking this item means that you should have trained your model from scratch based on the original paper's specification and verified that the final result matches the report within a minor error range. - [ ] Milestone 3: Good to be a part of our core package! - [ ] Type hints and docstrings - +> Ideally *all* the methods should have [type hints](https://www.pythontutorial.net/python-basics/python-type-hints/) and [docstrings](https://google.github.io/styleguide/pyguide.html#381-docstrings). [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/mmseg/utils/io.py#L9) - - [ ] Unit tests +- [ ] Unit tests - +> Unit tests for each module are required. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/tests/test_utils/test_io.py#L14) - - [ ] Code polishing +- [ ] Code polishing - +> Refactor your code according to reviewer's comment. - - [ ] Metafile.yml +- [ ] Metafile.yml - +> It will be parsed by MIM and Inferencer. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/fcn.yml) - [ ] Move your modules into the core package following the codebase's file hierarchy structure. - +> In particular, you may have to refactor this README into a standard one. [Example](https://github.com/open-mmlab/mmsegmentation/blob/dev-1.x/configs/fcn/README.md) - [ ] Refactor your modules into the core package following the codebase's file hierarchy structure. diff --git a/projects/example_project/configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.py b/projects/example_project/configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.py index b0ec67b691..43015364eb 100644 --- a/projects/example_project/configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.py +++ b/projects/example_project/configs/fcn_dummy-r50-d8_4xb2-40k_cityscapes-512x1024.py @@ -1,6 +1,6 @@ -_base_ = ['../../../configs/fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py'] +_base_ = ['mmseg::fcn/fcn_r50-d8_4xb2-40k_cityscapes-512x1024.py'] -custom_imports = dict(imports=['projects.example_project.dummy']) +custom_imports = dict(imports=['dummy']) crop_size = (512, 1024) data_preprocessor = dict(size=crop_size) diff --git a/projects/faq.md b/projects/faq.md new file mode 100644 index 0000000000..724c1cf6a5 --- /dev/null +++ b/projects/faq.md @@ -0,0 +1,19 @@ +Q1: Why set up `projects/` folder? + +Implementing new models and features into OpenMMLab's algorithm libraries could be troublesome due to the rigorous requirements on code quality, which could hinder the fast iteration of SOTA models and might discourage our members from sharing their latest outcomes here. And that's why we have this `projects/` folder now, where some experimental features, frameworks and models are placed, only needed to satisfy the minimum requirement on the code quality, and can be used as standalone libraries. Users are welcome to use them if they [use MMSegmentation from source](https://mmsegmentation.readthedocs.io/en/dev-1.x/get_started.html#best-practices). + +Q2: Why should there be a checklist for a project? + +This checkelist is crucial not only for this project's developers but the entire community, since there might be some other contributors joining this project and deciding their starting point from this list. It also helps maintainers accurately estimate time and effort on further code polishing, if needed. + +Q3: What kind of PR will be merged? + +Reaching the first milestone means that this project suffices the minimum requirement of being merged into 'projects/'. That is, the very first PR of a project must have all the terms in the first milestone checked. We do not have any extra requirements on the project's following PRs, so they can be a minor bug fix or update, and do not have to achieve one milestone at once. But keep in mind that this project is only eligible to become a part of the core package upon attaining the last milestone. + +Q4: Compared to other models in the core packages, why do the model implementations in projects have different training/testing commands? + +Projects are organized independently from the core package, and therefore their modules cannot be directly imported by train.py and test.py. Each model implementation in projects should either use `mim` for training/testing as suggested in the example project or provide a custom train.py/test.py. + +Q5: How to debug a project with a debugger? + +Debugger makes our lives easier, but using it becomes a bit tricky if we have to train/test a model via `mim`. The way to circumvent that is that we can take advantage of relative path to import these modules. Assuming that we are developing a project X and the core modules are placed under `projects/X/modules`, then simply adding `custom_imports = dict(imports='projects.X.modules')` to the config allows us to debug from usual entrypoints (e.g. `tools/train.py`) from the root directory of the algorithm library. Just don't forget to remove 'projects.X' before project publishment. diff --git a/projects/HieraSeg/README.md b/projects/hssn/README.md similarity index 82% rename from projects/HieraSeg/README.md rename to projects/hssn/README.md index 5519ec6916..c2a74c69f9 100644 --- a/projects/HieraSeg/README.md +++ b/projects/hssn/README.md @@ -1,12 +1,10 @@ -# HieraSeg - -Support `Deep Hierarchical Semantic Segmentation` interface on `cityscapes` +# HSSN ## Description Author: AI-Tianlong -This project implements `HieraSeg` inference in the `cityscapes` dataset +This project implements `Deep Hierarchical Semantic Segmentation` inference on `cityscapes` dataset ## Usage @@ -14,17 +12,17 @@ This project implements `HieraSeg` inference in the `cityscapes` dataset - Python 3.8 - PyTorch 1.6 or higher -- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) v1.0.0rc3 -- mmcv v2.0.0rc3 -- mmengine +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) v1.0.0rc5 +- mmcv v2.0.0rc4 +- mmengine >=0.4.0 ### Dataset preparing -preparing `cityscapes` dataset like this [structure](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets) +Preparing `cityscapes` dataset following this [Dataset Preparing Guide](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets) ### Testing commands -please put [`hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth`](https://download.openmmlab.com/mmsegmentation/v0.5/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth) to `mmsegmentation/checkpoints` +Please put [`hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth`](https://download.openmmlab.com/mmsegmentation/v0.5/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth) to `mmsegmentation/checkpoints` #### Multi-GPUs Test @@ -36,7 +34,7 @@ bash tools/dist_test.sh [configs] [model weights] [number of gpu] --tta #### Example ```shell -bash tools/dist_test.sh projects/HieraSeg_project/configs/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py checkpoints/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth 2 --tta +bash tools/dist_test.sh projects/hssn/configs/hssn/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py checkpoints/hieraseg_deeplabv3plus_r101-d8_4xb2-80k_cityscapes-512x1024_20230112_125023-bc59a3d1.pth 2 --tta ``` ## Results diff --git a/projects/HieraSeg/configs/_base_/datasets/cityscapes.py b/projects/hssn/configs/_base_/datasets/cityscapes.py similarity index 100% rename from projects/HieraSeg/configs/_base_/datasets/cityscapes.py rename to projects/hssn/configs/_base_/datasets/cityscapes.py diff --git a/projects/HieraSeg/configs/_base_/default_runtime.py b/projects/hssn/configs/_base_/default_runtime.py similarity index 100% rename from projects/HieraSeg/configs/_base_/default_runtime.py rename to projects/hssn/configs/_base_/default_runtime.py diff --git a/projects/HieraSeg/configs/_base_/models/deeplabv3plus_r50-d8_vd_contrast.py b/projects/hssn/configs/_base_/models/deeplabv3plus_r50-d8_vd_contrast.py similarity index 100% rename from projects/HieraSeg/configs/_base_/models/deeplabv3plus_r50-d8_vd_contrast.py rename to projects/hssn/configs/_base_/models/deeplabv3plus_r50-d8_vd_contrast.py diff --git a/projects/HieraSeg/configs/_base_/schedules/schedule_80k.py b/projects/hssn/configs/_base_/schedules/schedule_80k.py similarity index 100% rename from projects/HieraSeg/configs/_base_/schedules/schedule_80k.py rename to projects/hssn/configs/_base_/schedules/schedule_80k.py diff --git a/projects/HieraSeg/configs/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py b/projects/hssn/configs/hssn/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py similarity index 82% rename from projects/HieraSeg/configs/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py rename to projects/hssn/configs/hssn/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py index 0d02bef5dc..8f04a2d656 100644 --- a/projects/HieraSeg/configs/hieraseg/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py +++ b/projects/hssn/configs/hssn/hieraseg_deeplabv3plus_r101-d8_4xb2-80l_cityscapes-512x1024.py @@ -5,8 +5,8 @@ ] custom_imports = dict(imports=[ - 'projects.HieraSeg.decode_head.sep_aspp_contrast_head', - 'projects.HieraSeg.losses.hiera_triplet_loss_cityscape' + 'projects.hssn.decode_head.sep_aspp_contrast_head', + 'projects.hssn.losses.hiera_triplet_loss_cityscape' ]) model = dict( diff --git a/projects/HieraSeg/decode_head/__init__.py b/projects/hssn/decode_head/__init__.py similarity index 100% rename from projects/HieraSeg/decode_head/__init__.py rename to projects/hssn/decode_head/__init__.py diff --git a/projects/HieraSeg/decode_head/sep_aspp_contrast_head.py b/projects/hssn/decode_head/sep_aspp_contrast_head.py similarity index 76% rename from projects/HieraSeg/decode_head/sep_aspp_contrast_head.py rename to projects/hssn/decode_head/sep_aspp_contrast_head.py index 75f67e7457..d1d087362c 100644 --- a/projects/HieraSeg/decode_head/sep_aspp_contrast_head.py +++ b/projects/hssn/decode_head/sep_aspp_contrast_head.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import List +from typing import List, Tuple import torch import torch.nn as nn @@ -10,6 +10,7 @@ from mmseg.models.losses import accuracy from mmseg.models.utils import resize from mmseg.registry import MODELS +from mmseg.utils import SampleList class ProjectionHead(nn.Module): @@ -61,34 +62,16 @@ def __init__(self, proj: str = 'convmlp', **kwargs): dim_in=2048, norm_cfg=self.norm_cfg, proj=proj) self.register_buffer('step', torch.zeros(1)) - def forward(self, inputs): + def forward(self, inputs) -> Tuple[Tensor]: """Forward function.""" + output = super().forward(inputs) + self.step += 1 embedding = self.proj_head(inputs[-1]) - x = self._transform_inputs(inputs) - aspp_outs = [ - resize( - self.image_pool(x), - size=x.size()[2:], - mode='bilinear', - align_corners=self.align_corners) - ] - aspp_outs.extend(self.aspp_modules(x)) - aspp_outs = torch.cat(aspp_outs, dim=1) - output = self.bottleneck(aspp_outs) - if self.c1_bottleneck is not None: - c1_output = self.c1_bottleneck(inputs[0]) - output = resize( - input=output, - size=c1_output.shape[2:], - mode='bilinear', - align_corners=self.align_corners) - output = torch.cat([output, c1_output], dim=1) - output = self.sep_bottleneck(output) - output = self.cls_seg(output) + return output, embedding - def predict_by_feat(self, seg_logits: Tensor, + def predict_by_feat(self, seg_logits: Tuple[Tensor], batch_img_metas: List[dict]) -> Tensor: """Transform a batch of output seg_logits to the input shape. @@ -100,12 +83,13 @@ def predict_by_feat(self, seg_logits: Tensor, Returns: Tensor: Outputs segmentation logits map. """ - # HieraSeg decode_head output is: (out, embedding) :tuple, + # HSSN decode_head output is: (out, embedding): tuple # only need 'out' here. if isinstance(seg_logits, tuple): seg_logit = seg_logits[0] - if seg_logit.size(1) == 26: + if seg_logit.size(1) == 26: # For cityscapes dataset,19 + 7 + hiera_num_classes = 7 seg_logit[:, 0:2] += seg_logit[:, -7] seg_logit[:, 2:5] += seg_logit[:, -6] seg_logit[:, 5:8] += seg_logit[:, -5] @@ -113,14 +97,18 @@ def predict_by_feat(self, seg_logits: Tensor, seg_logit[:, 10:11] += seg_logit[:, -3] seg_logit[:, 11:13] += seg_logit[:, -2] seg_logit[:, 13:19] += seg_logit[:, -1] - elif seg_logit.size(1) == 12: + + elif seg_logit.size(1) == 12: # For Pascal_person dataset, 7 + 5 + hiera_num_classes = 5 seg_logit[:, 0:1] = seg_logit[:, 0:1] + \ seg_logit[:, 7] + seg_logit[:, 10] seg_logit[:, 1:5] = seg_logit[:, 1:5] + \ seg_logit[:, 8] + seg_logit[:, 11] seg_logit[:, 5:7] = seg_logit[:, 5:7] + \ seg_logit[:, 9] + seg_logit[:, 11] - elif seg_logit.size(1) == 25: + + elif seg_logit.size(1) == 25: # For LIP dataset, 20 + 5 + hiera_num_classes = 5 seg_logit[:, 0:1] = seg_logit[:, 0:1] + \ seg_logit[:, 20] + seg_logit[:, 23] seg_logit[:, 1:8] = seg_logit[:, 1:8] + \ @@ -136,8 +124,10 @@ def predict_by_feat(self, seg_logits: Tensor, seg_logit[:, 16:20] = seg_logit[:, 16:20] + \ seg_logit[:, 22] + seg_logit[:, 24] - # seg_logit = seg_logit[:,:-self.test_cfg['hiera_num_classes']] - seg_logit = seg_logit[:, :-7] + # elif seg_logit.size(1) == 144 # For Mapillary dataset, 124+16+4 + # unofficial repository not release mapillary until 2023/2/6 + + seg_logit = seg_logit[:, :-hiera_num_classes] seg_logit = resize( input=seg_logit, size=batch_img_metas[0]['img_shape'], @@ -146,10 +136,27 @@ def predict_by_feat(self, seg_logits: Tensor, return seg_logit - def losses(self, results, seg_label): - """Compute segmentation loss.""" - seg_logit_before = results[0] - embedding = results[1] + def loss_by_feat( + self, + seg_logits: Tuple[Tensor], # (out, embedding) + batch_data_samples: SampleList) -> dict: + """Compute segmentation loss. Will fix in future. + + Args: + seg_logits (Tuple[Tensor]): The output from decode head + forward function. + For this decode_head output are (out, embedding): tuple + batch_data_samples (List[:obj:`SegDataSample`]): The seg + data samples. It usually includes information such + as `metainfo` and `gt_sem_seg`. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logit_before = seg_logits[0] + embedding = seg_logits[1] + seg_label = self._stack_batch_gt(batch_data_samples) + loss = dict() seg_logit = resize( input=seg_logit_before, @@ -166,6 +173,7 @@ def losses(self, results, seg_label): scale_factor=0.5, mode='bilinear', align_corners=self.align_corners) + loss['loss_seg'] = self.loss_decode( self.step, embedding, diff --git a/projects/HieraSeg/losses/__init__.py b/projects/hssn/losses/__init__.py similarity index 100% rename from projects/HieraSeg/losses/__init__.py rename to projects/hssn/losses/__init__.py diff --git a/projects/HieraSeg/losses/hiera_triplet_loss_cityscape.py b/projects/hssn/losses/hiera_triplet_loss_cityscape.py similarity index 100% rename from projects/HieraSeg/losses/hiera_triplet_loss_cityscape.py rename to projects/hssn/losses/hiera_triplet_loss_cityscape.py diff --git a/projects/HieraSeg/losses/tree_triplet_loss.py b/projects/hssn/losses/tree_triplet_loss.py similarity index 100% rename from projects/HieraSeg/losses/tree_triplet_loss.py rename to projects/hssn/losses/tree_triplet_loss.py diff --git a/requirements/mminstall.txt b/requirements/mminstall.txt index 11a6d5a57f..df073e0c1b 100644 --- a/requirements/mminstall.txt +++ b/requirements/mminstall.txt @@ -1,4 +1,2 @@ -mmcls>=1.0.0rc0 mmcv>=2.0.0rc4 --e git+https://github.com/open-mmlab/mmdetection.git@dev-3.x#egg=mmdet -mmengine>=0.2.0,<1.0.0 +mmengine>=0.5.0,<1.0.0 diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt index 1b5d8443b4..9627504884 100644 --- a/requirements/readthedocs.txt +++ b/requirements/readthedocs.txt @@ -1,5 +1,5 @@ mmcv>=2.0.0rc1,<2.1.0 -mmengine>=0.1.0,<1.0.0 +mmengine>=0.4.0,<1.0.0 prettytable scipy torch diff --git a/tests/data/pseudo_refuge_dataset/ann_dir/pseudo_g0001.png b/tests/data/pseudo_refuge_dataset/ann_dir/pseudo_g0001.png new file mode 100644 index 0000000000..4e69365a9c Binary files /dev/null and b/tests/data/pseudo_refuge_dataset/ann_dir/pseudo_g0001.png differ diff --git a/tests/data/pseudo_refuge_dataset/img_dir/pseudo_g0001.png b/tests/data/pseudo_refuge_dataset/img_dir/pseudo_g0001.png new file mode 100644 index 0000000000..e424c3cd21 Binary files /dev/null and b/tests/data/pseudo_refuge_dataset/img_dir/pseudo_g0001.png differ diff --git a/tests/test_apis/test_inferencer.py b/tests/test_apis/test_inferencer.py new file mode 100644 index 0000000000..497eae4a01 --- /dev/null +++ b/tests/test_apis/test_inferencer.py @@ -0,0 +1,113 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import tempfile + +import numpy as np +import torch +import torch.nn as nn +from mmengine import ConfigDict +from torch.utils.data import DataLoader, Dataset + +from mmseg.apis import MMSegInferencer +from mmseg.models import EncoderDecoder +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.registry import MODELS +from mmseg.utils import register_all_modules + + +@MODELS.register_module(name='InferExampleHead') +class ExampleDecodeHead(BaseDecodeHead): + + def __init__(self, num_classes=19, out_channels=None): + super().__init__( + 3, 3, num_classes=num_classes, out_channels=out_channels) + + def forward(self, inputs): + return self.cls_seg(inputs[0]) + + +@MODELS.register_module(name='InferExampleBackbone') +class ExampleBackbone(nn.Module): + + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(3, 3, 3) + + def init_weights(self, pretrained=None): + pass + + def forward(self, x): + return [self.conv(x)] + + +@MODELS.register_module(name='InferExampleModel') +class ExampleModel(EncoderDecoder): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + +class ExampleDataset(Dataset): + + def __init__(self) -> None: + super().__init__() + self.pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') + ] + + def __getitem__(self, idx): + return dict(img=torch.tensor([1]), img_metas=dict()) + + def __len__(self): + return 1 + + +def test_inferencer(): + register_all_modules() + test_dataset = ExampleDataset() + data_loader = DataLoader( + test_dataset, + batch_size=1, + sampler=None, + num_workers=0, + shuffle=False, + ) + + visualizer = dict( + type='SegLocalVisualizer', + vis_backends=[dict(type='LocalVisBackend')], + name='visualizer') + + cfg_dict = dict( + model=dict( + type='InferExampleModel', + data_preprocessor=dict(type='SegDataPreProcessor'), + backbone=dict(type='InferExampleBackbone'), + decode_head=dict(type='InferExampleHead'), + test_cfg=dict(mode='whole')), + visualizer=visualizer, + test_dataloader=data_loader) + cfg = ConfigDict(cfg_dict) + model = MODELS.build(cfg.model) + + ckpt = model.state_dict() + ckpt_filename = tempfile.mktemp() + torch.save(ckpt, ckpt_filename) + + # test initialization + infer = MMSegInferencer(cfg, ckpt_filename) + + # test forward + img = np.random.randint(0, 256, (4, 4, 3)) + infer(img) + + imgs = [img, img] + infer(imgs) + results = infer(imgs, out_dir=tempfile.gettempdir()) + + # test results + assert 'predictions' in results + assert 'visualization' in results + assert len(results['predictions']) == 2 + assert results['predictions'][0].shape == (4, 4) diff --git a/tests/test_config.py b/tests/test_config.py index bd664ed74f..13de460181 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -6,10 +6,10 @@ import numpy as np from mmengine import Config from mmengine.dataset import Compose +from mmengine.registry import init_default_scope from torch import nn from mmseg.models import build_segmentor -from mmseg.utils import register_all_modules def _get_config_directory(): @@ -70,7 +70,7 @@ def test_config_data_pipeline(): xdoctest -m tests/test_config.py test_config_build_data_pipeline """ - register_all_modules() + init_default_scope('mmseg') config_dpath = _get_config_directory() print(f'Found config_dpath = {config_dpath!r}') diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 7c37204a6c..b97cbae3a4 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -8,7 +8,8 @@ from mmseg.datasets import (ADE20KDataset, BaseSegDataset, CityscapesDataset, COCOStuffDataset, DecathlonDataset, ISPRSDataset, LIPDataset, LoveDADataset, PascalVOCDataset, - PotsdamDataset, SynapseDataset, iSAIDDataset) + PotsdamDataset, REFUGEDataset, SynapseDataset, + iSAIDDataset) from mmseg.registry import DATASETS from mmseg.utils import get_classes, get_palette @@ -232,6 +233,19 @@ def test_synapse(): assert len(test_dataset) == 2 +def test_refuge(): + test_dataset = REFUGEDataset( + pipeline=[], + data_prefix=dict( + img_path=osp.join( + osp.dirname(__file__), + '../data/pseudo_refuge_dataset/img_dir'), + seg_map_path=osp.join( + osp.dirname(__file__), + '../data/pseudo_refuge_dataset/ann_dir'))) + assert len(test_dataset) == 1 + + def test_isaid(): test_dataset = iSAIDDataset( pipeline=[], diff --git a/tests/test_datasets/test_dataset_builder.py b/tests/test_datasets/test_dataset_builder.py index 099c5b1df0..b67b1e7aaf 100644 --- a/tests/test_datasets/test_dataset_builder.py +++ b/tests/test_datasets/test_dataset_builder.py @@ -2,12 +2,12 @@ import os.path as osp from mmengine.dataset import ConcatDataset, RepeatDataset +from mmengine.registry import init_default_scope from mmseg.datasets import MultiImageMixDataset from mmseg.registry import DATASETS -from mmseg.utils import register_all_modules -register_all_modules() +init_default_scope('mmseg') @DATASETS.register_module() diff --git a/tests/test_datasets/test_loading.py b/tests/test_datasets/test_loading.py index 100eb042e2..5ce624bff6 100644 --- a/tests/test_datasets/test_loading.py +++ b/tests/test_datasets/test_loading.py @@ -57,9 +57,9 @@ def test_load_seg(self): results = transform(copy.deepcopy(results)) assert results['gt_seg_map'].shape == (288, 512) assert results['gt_seg_map'].dtype == np.uint8 - # assert repr(transform) == transform.__class__.__name__ + \ - # "(reduce_zero_label=True, imdecode_backend='pillow', " + \ - # "backend_args={'backend': 'local'})" + assert repr(transform) == transform.__class__.__name__ + \ + "(reduce_zero_label=True, imdecode_backend='pillow', " + \ + 'backend_args=None)' # reduce_zero_label transform = LoadAnnotations(reduce_zero_label=True) @@ -241,7 +241,7 @@ def test_load_biomedical_img(self): "decode_backend='nifti', " 'to_xyz=False, ' 'to_float32=True, ' - "backend_args={'backend': 'local'})") + 'backend_args=None)') def test_load_biomedical_annotation(self): results = dict( @@ -265,7 +265,7 @@ def test_load_biomedical_data(self): 'with_seg=True, ' "decode_backend='numpy', " 'to_xyz=False, ' - "backend_args={'backend': 'local'})") + 'backend_args=None)') transform = LoadBiomedicalData(with_seg=False) results = transform(copy.deepcopy(input_results)) @@ -275,4 +275,4 @@ def test_load_biomedical_data(self): 'with_seg=False, ' "decode_backend='numpy', " 'to_xyz=False, ' - "backend_args={'backend': 'local'})") + 'backend_args=None)') diff --git a/tests/test_datasets/test_transform.py b/tests/test_datasets/test_transform.py index 906b3c27e8..a9136bebc8 100644 --- a/tests/test_datasets/test_transform.py +++ b/tests/test_datasets/test_transform.py @@ -5,6 +5,7 @@ import mmcv import numpy as np import pytest +from mmengine.registry import init_default_scope from PIL import Image from mmseg.datasets.transforms import * # noqa @@ -12,9 +13,8 @@ LoadBiomedicalImageFromFile, PhotoMetricDistortion, RandomCrop) from mmseg.registry import TRANSFORMS -from mmseg.utils import register_all_modules -register_all_modules() +init_default_scope('mmseg') def test_resize(): diff --git a/tests/test_engine/test_layer_decay_optimizer_constructor.py b/tests/test_engine/test_layer_decay_optimizer_constructor.py index 72dc6c5123..e7d13db1d6 100644 --- a/tests/test_engine/test_layer_decay_optimizer_constructor.py +++ b/tests/test_engine/test_layer_decay_optimizer_constructor.py @@ -5,12 +5,12 @@ import torch.nn as nn from mmcv.cnn import ConvModule from mmengine.optim.optimizer import build_optim_wrapper +from mmengine.registry import init_default_scope from mmseg.engine.optimizers.layer_decay_optimizer_constructor import \ LearningRateDecayOptimizerConstructor -from mmseg.utils import register_all_modules -register_all_modules() +init_default_scope('mmseg') base_lr = 1 decay_rate = 2 diff --git a/tests/test_models/test_backbones/test_unet.py b/tests/test_models/test_backbones/test_unet.py index d0eaccd393..4d3faf68cc 100644 --- a/tests/test_models/test_backbones/test_unet.py +++ b/tests/test_models/test_backbones/test_unet.py @@ -2,14 +2,14 @@ import pytest import torch from mmcv.cnn import ConvModule +from mmengine.registry import init_default_scope from mmseg.models.backbones.unet import (BasicConvBlock, DeconvModule, InterpConv, UNet, UpConvBlock) from mmseg.models.utils import Upsample -from mmseg.utils import register_all_modules from .utils import check_norm_state -register_all_modules() +init_default_scope('mmseg') def test_unet_basic_conv_block(): diff --git a/tests/test_models/test_forward.py b/tests/test_models/test_forward.py index ab88e4393a..7f72efae2a 100644 --- a/tests/test_models/test_forward.py +++ b/tests/test_models/test_forward.py @@ -9,14 +9,14 @@ import torch import torch.nn as nn from mmengine.model.utils import revert_sync_batchnorm +from mmengine.registry import init_default_scope from mmengine.structures import PixelData from mmengine.utils import is_list_of, is_tuple_of from torch import Tensor from mmseg.structures import SegDataSample -from mmseg.utils import register_all_modules -register_all_modules() +init_default_scope('mmseg') def _demo_mm_inputs(batch_size=2, image_shapes=(3, 32, 32), num_classes=5): diff --git a/tests/test_models/test_heads/test_maskformer_head.py b/tests/test_models/test_heads/test_maskformer_head.py index fe4bf96fea..6a47239b03 100644 --- a/tests/test_models/test_heads/test_maskformer_head.py +++ b/tests/test_models/test_heads/test_maskformer_head.py @@ -3,15 +3,15 @@ import torch from mmengine import Config +from mmengine.registry import init_default_scope from mmengine.structures import PixelData from mmseg.registry import MODELS from mmseg.structures import SegDataSample -from mmseg.utils import register_all_modules def test_maskformer_head(): - register_all_modules() + init_default_scope('mmseg') repo_dpath = dirname(dirname(__file__)) cfg = Config.fromfile( join( diff --git a/tests/test_models/test_segmentors/test_seg_tta_model.py b/tests/test_models/test_segmentors/test_seg_tta_model.py index c0e76b22f4..3c9699e8df 100644 --- a/tests/test_models/test_segmentors/test_seg_tta_model.py +++ b/tests/test_models/test_segmentors/test_seg_tta_model.py @@ -2,14 +2,14 @@ import torch from mmengine import ConfigDict from mmengine.model import BaseTTAModel +from mmengine.registry import init_default_scope from mmengine.structures import PixelData from mmseg.registry import MODELS from mmseg.structures import SegDataSample -from mmseg.utils import register_all_modules from .utils import * # noqa: F401,F403 -register_all_modules() +init_default_scope('mmseg') def test_encoder_decoder_tta(): diff --git a/tools/analysis_tools/benchmark.py b/tools/analysis_tools/benchmark.py index bcb3948a6e..afaeabac85 100644 --- a/tools/analysis_tools/benchmark.py +++ b/tools/analysis_tools/benchmark.py @@ -8,11 +8,11 @@ from mmengine import Config from mmengine.fileio import dump from mmengine.model.utils import revert_sync_batchnorm +from mmengine.registry import init_default_scope from mmengine.runner import Runner, load_checkpoint from mmengine.utils import mkdir_or_exist from mmseg.registry import MODELS -from mmseg.utils import register_all_modules def parse_args(): @@ -32,8 +32,10 @@ def parse_args(): def main(): args = parse_args() - register_all_modules() cfg = Config.fromfile(args.config) + + init_default_scope(cfg.get('default_scope', 'mmseg')) + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) if args.work_dir is not None: mkdir_or_exist(osp.abspath(args.work_dir)) diff --git a/tools/analysis_tools/browse_dataset.py b/tools/analysis_tools/browse_dataset.py new file mode 100644 index 0000000000..925c14a8ab --- /dev/null +++ b/tools/analysis_tools/browse_dataset.py @@ -0,0 +1,77 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +from mmengine.config import Config, DictAction +from mmengine.utils import ProgressBar + +from mmseg.registry import DATASETS, VISUALIZERS +from mmseg.utils import register_all_modules + + +def parse_args(): + parser = argparse.ArgumentParser(description='Browse a dataset') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--output-dir', + default=None, + type=str, + help='If there is no display interface, you can save it') + parser.add_argument('--not-show', default=False, action='store_true') + parser.add_argument( + '--show-interval', + type=float, + default=2, + help='the interval of show (s)') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # register all modules in mmdet into the registries + register_all_modules() + + dataset = DATASETS.build(cfg.train_dataloader.dataset) + visualizer = VISUALIZERS.build(cfg.visualizer) + visualizer.dataset_meta = dataset.metainfo + + progress_bar = ProgressBar(len(dataset)) + for item in dataset: + img = item['inputs'].permute(1, 2, 0).numpy() + img = img[..., [2, 1, 0]] # bgr to rgb + data_sample = item['data_samples'].numpy() + img_path = osp.basename(item['data_samples'].img_path) + + out_file = osp.join( + args.output_dir, + osp.basename(img_path)) if args.output_dir is not None else None + + visualizer.add_datasample( + name=osp.basename(img_path), + image=img, + data_sample=data_sample, + draw_gt=True, + draw_pred=False, + wait_time=args.show_interval, + out_file=out_file, + show=not args.not_show) + progress_bar.update() + + +if __name__ == '__main__': + main() diff --git a/tools/dataset_converters/refuge.py b/tools/dataset_converters/refuge.py new file mode 100644 index 0000000000..1186866ab3 --- /dev/null +++ b/tools/dataset_converters/refuge.py @@ -0,0 +1,110 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +import numpy as np +from mmengine.utils import mkdir_or_exist + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert REFUGE dataset to mmsegmentation format') + parser.add_argument('--raw_data_root', help='the root path of raw data') + + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def extract_img(root: str, + cur_dir: str, + out_dir: str, + mode: str = 'train', + file_type: str = 'img') -> None: + """_summary_ + + Args: + Args: + root (str): root where the extracted data is saved + cur_dir (cur_dir): dir where the zip_file exists + out_dir (str): root dir where the data is saved + + mode (str, optional): Defaults to 'train'. + file_type (str, optional): Defaults to 'img',else to 'mask'. + """ + zip_file = zipfile.ZipFile(cur_dir) + zip_file.extractall(root) + for cur_dir, dirs, files in os.walk(root): + # filter child dirs and directories with "Illustration" and "MACOSX" + if len(dirs) == 0 and \ + cur_dir.split('\\')[-1].find('Illustration') == -1 and \ + cur_dir.find('MACOSX') == -1: + + file_names = [ + file for file in files + if file.endswith('.jpg') or file.endswith('.bmp') + ] + for filename in sorted(file_names): + img = mmcv.imread(osp.join(cur_dir, filename)) + + if file_type == 'annotations': + img = img[:, :, 0] + img[np.where(img == 0)] = 1 + img[np.where(img == 128)] = 2 + img[np.where(img == 255)] = 0 + mmcv.imwrite( + img, + osp.join(out_dir, file_type, mode, + osp.splitext(filename)[0] + '.png')) + + +def main(): + args = parse_args() + + raw_data_root = args.raw_data_root + if args.out_dir is None: + out_dir = osp.join('./data', 'REFUGE') + + else: + out_dir = args.out_dir + + print('Making directories...') + mkdir_or_exist(out_dir) + mkdir_or_exist(osp.join(out_dir, 'images')) + mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'images', 'test')) + mkdir_or_exist(osp.join(out_dir, 'annotations')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + mkdir_or_exist(osp.join(out_dir, 'annotations', 'test')) + + print('Generating images and annotations...') + # process data from the child dir on the first rank + cur_dir, dirs, files = list(os.walk(raw_data_root))[0] + print('====================') + + files = list(filter(lambda x: x.endswith('.zip'), files)) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for file in files: + # search data folders for training,validation,test + mode = list( + filter(lambda x: file.lower().find(x) != -1, + ['training', 'test', 'validation']))[0] + file_root = osp.join(tmp_dir, file[:-4]) + file_type = 'images' if file.find('Anno') == -1 and file.find( + 'GT') == -1 else 'annotations' + extract_img(file_root, osp.join(cur_dir, file), out_dir, mode, + file_type) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py index b2852c21ab..7863eb74f2 100644 --- a/tools/misc/browse_dataset.py +++ b/tools/misc/browse_dataset.py @@ -3,10 +3,10 @@ import os.path as osp from mmengine import Config, DictAction +from mmengine.registry import init_default_scope from mmengine.utils import ProgressBar from mmseg.registry import DATASETS, VISUALIZERS -from mmseg.utils import register_all_modules def parse_args(): @@ -44,7 +44,7 @@ def main(): cfg.merge_from_dict(args.cfg_options) # register all modules in mmseg into the registries - register_all_modules() + init_default_scope('mmseg') dataset = DATASETS.build(cfg.train_dataloader.dataset) cfg.visualizer['save_dir'] = args.output_dir diff --git a/tools/test.py b/tools/test.py index b21b990f26..7bfde58206 100644 --- a/tools/test.py +++ b/tools/test.py @@ -6,8 +6,6 @@ from mmengine.config import Config, DictAction from mmengine.runner import Runner -from mmseg.utils import register_all_modules - # TODO: support fuse_conv_bn, visualization, and format_only def parse_args(): @@ -77,10 +75,6 @@ def trigger_visualization_hook(cfg, args): def main(): args = parse_args() - # register all modules in mmseg into the registries - # do not init the default scope here because it will be init in the runner - register_all_modules(init_default_scope=False) - # load config cfg = Config.fromfile(args.config) cfg.launcher = args.launcher diff --git a/tools/train.py b/tools/train.py index 172815a9fe..1721306664 100644 --- a/tools/train.py +++ b/tools/train.py @@ -6,10 +6,9 @@ from mmengine.config import Config, DictAction from mmengine.logging import print_log -from mmengine.registry import RUNNERS from mmengine.runner import Runner -from mmseg.utils import register_all_modules +from mmseg.registry import RUNNERS def parse_args(): @@ -52,10 +51,6 @@ def parse_args(): def main(): args = parse_args() - # register all modules in mmseg into the registries - # do not init the default scope here because it will be init in the runner - register_all_modules(init_default_scope=False) - # load config cfg = Config.fromfile(args.config) cfg.launcher = args.launcher