open-mmlab · mzr1996 · Dec 19, 2022 · Nov 30, 2022 · Nov 30, 2022 · Dec 8, 2022
diff --git a/configs/_base_/models/convnext/convnext-base.py b/configs/_base_/models/convnext/convnext-base.py
@@ -19,5 +19,11 @@
         type='LinearClsHead',
         num_classes=1000,
         in_channels=1024,
-        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
-    ))
+        loss=dict(
+            type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
+    ),
+    train_cfg=dict(augments=[
+        dict(type='Mixup', alpha=0.8),
+        dict(type='CutMix', alpha=1.0),
+    ]),
+)
diff --git a/configs/_base_/models/convnext/convnext-large.py b/configs/_base_/models/convnext/convnext-large.py
@@ -19,5 +19,11 @@
         type='LinearClsHead',
         num_classes=1000,
         in_channels=1536,
-        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
-    ))
+        loss=dict(
+            type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
+    ),
+    train_cfg=dict(augments=[
+        dict(type='Mixup', alpha=0.8),
+        dict(type='CutMix', alpha=1.0),
+    ]),
+)
diff --git a/configs/_base_/models/convnext/convnext-small.py b/configs/_base_/models/convnext/convnext-small.py
@@ -19,5 +19,11 @@
         type='LinearClsHead',
         num_classes=1000,
         in_channels=768,
-        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
-    ))
+        loss=dict(
+            type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
+    ),
+    train_cfg=dict(augments=[
+        dict(type='Mixup', alpha=0.8),
+        dict(type='CutMix', alpha=1.0),
+    ]),
+)
diff --git a/configs/_base_/models/convnext/convnext-tiny.py b/configs/_base_/models/convnext/convnext-tiny.py
@@ -19,5 +19,11 @@
         type='LinearClsHead',
         num_classes=1000,
         in_channels=768,
-        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
-    ))
+        loss=dict(
+            type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
+    ),
+    train_cfg=dict(augments=[
+        dict(type='Mixup', alpha=0.8),
+        dict(type='CutMix', alpha=1.0),
+    ]),
+)
diff --git a/configs/_base_/models/convnext/convnext-xlarge.py b/configs/_base_/models/convnext/convnext-xlarge.py
@@ -19,5 +19,11 @@
         type='LinearClsHead',
         num_classes=1000,
         in_channels=2048,
-        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
-    ))
+        loss=dict(
+            type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
+    ),
+    train_cfg=dict(augments=[
+        dict(type='Mixup', alpha=0.8),
+        dict(type='CutMix', alpha=1.0),
+    ]),
+)
diff --git a/configs/_base_/schedules/imagenet_bs1024_adamw_swin.py b/configs/_base_/schedules/imagenet_bs1024_adamw_swin.py
@@ -10,6 +10,7 @@
     paramwise_cfg=dict(
         norm_decay_mult=0.0,
         bias_decay_mult=0.0,
+        flat_decay_mult=0.0,
         custom_keys={
             '.absolute_pos_embed': dict(decay_mult=0.0),
             '.relative_position_bias_table': dict(decay_mult=0.0)

diff --git a/configs/convnext/README.md b/configs/convnext/README.md
@@ -36,9 +36,9 @@ The "Roaring 20s" of visual recognition began with the introduction of Vision Tr
 
 ```python
 >>> import torch
->>> from mmcls.apis import init_model, inference_model
+>>> from mmcls.apis import get_model, inference_model
 >>>
->>> model = init_model('configs/convnext/convnext-tiny_32xb128_in1k.py', 'https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128-noema_in1k_20220222-2908964a.pth')
+>>> model = get_model('convnext-tiny_32xb128_in1k', pretrained=True)
 >>> predict = inference_model(model, 'demo/demo.JPEG')
 >>> print(predict['pred_class'])
 sea snake
@@ -50,10 +50,10 @@ sea snake
 
 ```python
 >>> import torch
->>> from mmcls.apis import init_model
+>>> from mmcls.apis import get_model
 >>>
->>> model = init_model('configs/convnext/convnext-tiny_32xb128_in1k.py', 'https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128-noema_in1k_20220222-2908964a.pth')
->>> inputs = torch.rand(1, 3, 224, 224).to(model.data_preprocessor.device)
+>>> model = get_model('convnext-tiny_32xb128_in1k', pretrained=True)
+>>> inputs = torch.rand(1, 3, 224, 224))
 >>> # To get classification scores.
 >>> out = model(inputs)
 >>> print(out.shape)
@@ -85,35 +85,37 @@ For more configurable parameters, please refer to the [API](https://mmclassifica
 
 ## Results and models
 
-### ImageNet-1k
-
-|     Model     |   Pretrain   | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) |                   Config                   |                                              Download                                              |
-| :-----------: | :----------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------: |
-| ConvNeXt-T\*  | From scratch |   28.59   |   4.46   |   82.05   |   95.86   | [config](./convnext-tiny_32xb128_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128_in1k_20220124-18abde00.pth) |
-| ConvNeXt-S\*  | From scratch |   50.22   |   8.69   |   83.13   |   96.44   | [config](./convnext-small_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128_in1k_20220124-d39b5192.pth) |
-| ConvNeXt-B\*  | From scratch |   88.59   |  15.36   |   83.85   |   96.74   | [config](./convnext-base_32xb128_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth) |
-| ConvNeXt-B\*  | ImageNet-21k |   88.59   |  15.36   |   85.81   |   97.86   | [config](./convnext-base_32xb128_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth) |
-| ConvNeXt-L\*  | From scratch |  197.77   |  34.37   |   84.30   |   96.89   | [config](./convnext-large_64xb64_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth) |
-| ConvNeXt-L\*  | ImageNet-21k |  197.77   |  34.37   |   86.61   |   98.04   | [config](./convnext-large_64xb64_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth) |
-| ConvNeXt-XL\* | ImageNet-21k |  350.20   |  60.93   |   86.97   |   98.20   | [config](./convnext-xlarge_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth) |
-
-*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
-
 ### Pre-trained Models
 
 The pre-trained models on ImageNet-1k or ImageNet-21k are used to fine-tune on the downstream tasks.
 
-|     Model     | Training Data | Params(M) | Flops(G) |                                                               Download                                                                |
-| :-----------: | :-----------: | :-------: | :------: | :-----------------------------------------------------------------------------------------------------------------------------------: |
-| ConvNeXt-T\*  |  ImageNet-1k  |   28.59   |   4.46   | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128-noema_in1k_20220222-2908964a.pth)  |
-| ConvNeXt-S\*  |  ImageNet-1k  |   50.22   |   8.69   | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128-noema_in1k_20220222-fa001ca5.pth) |
-| ConvNeXt-B\*  |  ImageNet-1k  |   88.59   |  15.36   | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128-noema_in1k_20220222-dba4f95f.pth)  |
-| ConvNeXt-B\*  | ImageNet-21k  |   88.59   |  15.36   |        [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in21k_20220124-13b83eec.pth)        |
-| ConvNeXt-L\*  | ImageNet-21k  |  197.77   |  34.37   |       [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in21k_20220124-41b5a79f.pth)        |
-| ConvNeXt-XL\* | ImageNet-21k  |  350.20   |  60.93   |       [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_3rdparty_in21k_20220124-f909bad7.pth)       |
+| Model                                              | Training Data | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) |                                                Download                                                |
+| :------------------------------------------------- | :-----------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------------------------------------------: |
+| ConvNeXt-T (`convnext-tiny_32xb128-noema_in1k`)    |  ImageNet-1k  |   28.59   |   4.46   |   81.95   |   95.89   | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128-noema_in1k_20221208-5d4509c7.pth) |
+| ConvNeXt-S (`convnext-small_32xb128-noema_in1k`)   |  ImageNet-1k  |   50.22   |   8.69   |   83.21   |   96.48   | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128-noema_in1k_20221208-4a618995.pth) |
+| ConvNeXt-B (`convnext-base_32xb128-noema_in1k`)    |  ImageNet-1k  |   88.59   |  15.36   |   83.64   |   96.61   | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128-noema_in1k_20221208-f8182678.pth) |
+| ConvNeXt-B (`convnext-base_3rdparty-noema_in1k`)\* |  ImageNet-1k  |   88.59   |  15.36   |   83.71   |   96.60   | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128-noema_in1k_20220222-dba4f95f.pth) |
+| ConvNeXt-B (`convnext-base_3rdparty_in21k`)\*      | ImageNet-21k  |   88.59   |  15.36   |    N/A    |    N/A    | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in21k_20220124-13b83eec.pth) |
+| ConvNeXt-L (`convnext-large_3rdparty_in21k`)\*     | ImageNet-21k  |  197.77   |  34.37   |    N/A    |    N/A    | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in21k_20220124-41b5a79f.pth) |
+| ConvNeXt-XL (`convnext-xlarge_3rdparty_in21k`)\*   | ImageNet-21k  |  350.20   |  60.93   |    N/A    |    N/A    | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_3rdparty_in21k_20220124-f909bad7.pth) |
 
 *Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt).*
 
+### ImageNet-1k
+
+| Model                                                  |   Pretrain   | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) |                   Config                   |                         Download                          |
+| :----------------------------------------------------- | :----------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------: | :-------------------------------------------------------: |
+| ConvNeXt-T (`convnext-tiny_32xb128_in1k`)              | From scratch |   28.59   |   4.46   |   82.14   |   96.06   | [config](./convnext-tiny_32xb128_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128_in1k_20221207-998cf3e9.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128_in1k_20221207-998cf3e9.log.json) |
+| ConvNeXt-S (`convnext-small_32xb128_in1k`)             | From scratch |   50.22   |   8.69   |   83.16   |   96.56   | [config](./convnext-small_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128_in1k_20221207-4ab7052c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128_in1k_20221207-4ab7052c.log.json) |
+| ConvNeXt-B (`convnext-base_32xb128_in1k`)              | From scratch |   88.59   |  15.36   |   83.66   |   96.74   | [config](./convnext-base_32xb128_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128_in1k_20221207-fbdb5eb9.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128_in1k_20221207-fbdb5eb9.log.json) |
+| ConvNeXt-B (`convnext-base_3rdparty_in1k`)\*           | From scratch |   88.59   |  15.36   |   83.85   |   96.74   | [config](./convnext-base_32xb128_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth) |
+| ConvNeXt-B (`convnext-base_in21k-pre_3rdparty_in1k`)\* | ImageNet 21k |   88.59   |  15.36   |   85.81   |   97.86   | [config](./convnext-base_32xb128_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth) |
+| ConvNeXt-L (`convnext-large_3rdparty_in1k`)\*          | From scratch |  197.77   |  34.37   |   84.30   |   96.89   | [config](./convnext-large_64xb64_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth) |
+| ConvNeXt-L (`convnext-large_in21k-pre_3rdparty_in1k`)\* | ImageNet 21k |  197.77   |  34.37   |   86.61   |   98.04   | [config](./convnext-large_64xb64_in1k.py)  | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth) |
+| ConvNeXt-XL (`convnext-xlarge_in21k-pre_3rdparty_in1k`)\* | ImageNet 21k |  350.20   |  60.93   |   86.97   |   98.20   | [config](./convnext-xlarge_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth) |
+
+*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
+
 ## Citation
 
 ```bibtex

diff --git a/configs/convnext/convnext-base_32xb128_in1k.py b/configs/convnext/convnext-base_32xb128_in1k.py
@@ -11,11 +11,11 @@
 # schedule setting
 optim_wrapper = dict(
     optimizer=dict(lr=4e-3),
-    clip_grad=dict(max_norm=5.0),
+    clip_grad=None,
 )
 
 # runtime setting
-custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
 
 # NOTE: `auto_scale_lr` is for automatically scaling LR
 # based on the actual training batch size.

diff --git a/configs/convnext/convnext-base_32xb128_in21k.py b/configs/convnext/convnext-base_32xb128_in21k.py
@@ -0,0 +1,24 @@
+_base_ = [
+    '../_base_/models/convnext/convnext-base.py',
+    '../_base_/datasets/imagenet21k_bs128.py',
+    '../_base_/schedules/imagenet_bs1024_adamw_swin.py',
+    '../_base_/default_runtime.py',
+]
+
+# model setting
+model = dict(head=dict(num_classes=21841))
+
+# dataset setting
+data_preprocessor = dict(num_classes=21841)
+train_dataloader = dict(batch_size=128)
+
+# schedule setting
+optim_wrapper = dict(
+    optimizer=dict(lr=4e-3),
+    clip_grad=dict(max_norm=5.0),
+)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
diff --git a/configs/convnext/convnext-large_64xb64_in1k.py b/configs/convnext/convnext-large_64xb64_in1k.py
@@ -11,11 +11,11 @@
 # schedule setting
 optim_wrapper = dict(
     optimizer=dict(lr=4e-3),
-    clip_grad=dict(max_norm=5.0),
+    clip_grad=None,
 )
 
 # runtime setting
-custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
 
 # NOTE: `auto_scale_lr` is for automatically scaling LR
 # based on the actual training batch size.

diff --git a/configs/convnext/convnext-large_64xb64_in21k.py b/configs/convnext/convnext-large_64xb64_in21k.py
@@ -0,0 +1,24 @@
+_base_ = [
+    '../_base_/models/convnext/convnext-base.py',
+    '../_base_/datasets/imagenet21k_bs128.py',
+    '../_base_/schedules/imagenet_bs1024_adamw_swin.py',
+    '../_base_/default_runtime.py',
+]
+
+# model setting
+model = dict(head=dict(num_classes=21841))
+
+# dataset setting
+data_preprocessor = dict(num_classes=21841)
+train_dataloader = dict(batch_size=64)
+
+# schedule setting
+optim_wrapper = dict(
+    optimizer=dict(lr=4e-3),
+    clip_grad=dict(max_norm=5.0),
+)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)
diff --git a/configs/convnext/convnext-small_32xb128_in1k.py b/configs/convnext/convnext-small_32xb128_in1k.py
@@ -11,11 +11,11 @@
 # schedule setting
 optim_wrapper = dict(
     optimizer=dict(lr=4e-3),
-    clip_grad=dict(max_norm=5.0),
+    clip_grad=None,
 )
 
 # runtime setting
-custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
 
 # NOTE: `auto_scale_lr` is for automatically scaling LR
 # based on the actual training batch size.

diff --git a/configs/convnext/convnext-tiny_32xb128_in1k.py b/configs/convnext/convnext-tiny_32xb128_in1k.py
@@ -11,11 +11,11 @@
 # schedule setting
 optim_wrapper = dict(
     optimizer=dict(lr=4e-3),
-    clip_grad=dict(max_norm=5.0),
+    clip_grad=None,
 )
 
 # runtime setting
-custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
 
 # NOTE: `auto_scale_lr` is for automatically scaling LR
 # based on the actual training batch size.

diff --git a/configs/convnext/convnext-xlarge_64xb64_in1k.py b/configs/convnext/convnext-xlarge_64xb64_in1k.py
@@ -11,11 +11,11 @@
 # schedule setting
 optim_wrapper = dict(
     optimizer=dict(lr=4e-3),
-    clip_grad=dict(max_norm=5.0),
+    clip_grad=None,
 )
 
 # runtime setting
-custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
+custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
 
 # NOTE: `auto_scale_lr` is for automatically scaling LR
 # based on the actual training batch size.

diff --git a/configs/convnext/convnext-xlarge_64xb64_in21k.py b/configs/convnext/convnext-xlarge_64xb64_in21k.py
@@ -0,0 +1,24 @@
+_base_ = [
+    '../_base_/models/convnext/convnext-base.py',
+    '../_base_/datasets/imagenet21k_bs128.py',
+    '../_base_/schedules/imagenet_bs1024_adamw_swin.py',
+    '../_base_/default_runtime.py',
+]
+
+# model setting
+model = dict(head=dict(num_classes=21841))
+
+# dataset setting
+data_preprocessor = dict(num_classes=21841)
+train_dataloader = dict(batch_size=64)
+
+# schedule setting
+optim_wrapper = dict(
+    optimizer=dict(lr=4e-3),
+    clip_grad=dict(max_norm=5.0),
+)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR
+# based on the actual training batch size.
+# base_batch_size = (32 GPUs) x (128 samples per GPU)
+auto_scale_lr = dict(base_batch_size=4096)