Skip to content

Commit b635151

Browse files
authored
[Reproduce] Update ConvNeXt config files. (#1256)
* Update ConvNeXt training configs. * Update ConvNeXt network. * Update metafile and README. * Update README
1 parent 0e41636 commit b635151

19 files changed

+225
-95
lines changed

configs/_base_/models/convnext/convnext-base.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,11 @@
1919
type='LinearClsHead',
2020
num_classes=1000,
2121
in_channels=1024,
22-
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
23-
))
22+
loss=dict(
23+
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
24+
),
25+
train_cfg=dict(augments=[
26+
dict(type='Mixup', alpha=0.8),
27+
dict(type='CutMix', alpha=1.0),
28+
]),
29+
)

configs/_base_/models/convnext/convnext-large.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,11 @@
1919
type='LinearClsHead',
2020
num_classes=1000,
2121
in_channels=1536,
22-
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
23-
))
22+
loss=dict(
23+
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
24+
),
25+
train_cfg=dict(augments=[
26+
dict(type='Mixup', alpha=0.8),
27+
dict(type='CutMix', alpha=1.0),
28+
]),
29+
)

configs/_base_/models/convnext/convnext-small.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,11 @@
1919
type='LinearClsHead',
2020
num_classes=1000,
2121
in_channels=768,
22-
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
23-
))
22+
loss=dict(
23+
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
24+
),
25+
train_cfg=dict(augments=[
26+
dict(type='Mixup', alpha=0.8),
27+
dict(type='CutMix', alpha=1.0),
28+
]),
29+
)

configs/_base_/models/convnext/convnext-tiny.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,11 @@
1919
type='LinearClsHead',
2020
num_classes=1000,
2121
in_channels=768,
22-
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
23-
))
22+
loss=dict(
23+
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
24+
),
25+
train_cfg=dict(augments=[
26+
dict(type='Mixup', alpha=0.8),
27+
dict(type='CutMix', alpha=1.0),
28+
]),
29+
)

configs/_base_/models/convnext/convnext-xlarge.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,11 @@
1919
type='LinearClsHead',
2020
num_classes=1000,
2121
in_channels=2048,
22-
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
23-
))
22+
loss=dict(
23+
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
24+
),
25+
train_cfg=dict(augments=[
26+
dict(type='Mixup', alpha=0.8),
27+
dict(type='CutMix', alpha=1.0),
28+
]),
29+
)

configs/_base_/schedules/imagenet_bs1024_adamw_swin.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
paramwise_cfg=dict(
1111
norm_decay_mult=0.0,
1212
bias_decay_mult=0.0,
13+
flat_decay_mult=0.0,
1314
custom_keys={
1415
'.absolute_pos_embed': dict(decay_mult=0.0),
1516
'.relative_position_bias_table': dict(decay_mult=0.0)

configs/convnext/README.md

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ The "Roaring 20s" of visual recognition began with the introduction of Vision Tr
3636

3737
```python
3838
>>> import torch
39-
>>> from mmcls.apis import init_model, inference_model
39+
>>> from mmcls.apis import get_model, inference_model
4040
>>>
41-
>>> model = init_model('configs/convnext/convnext-tiny_32xb128_in1k.py', 'https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128-noema_in1k_20220222-2908964a.pth')
41+
>>> model = get_model('convnext-tiny_32xb128_in1k', pretrained=True)
4242
>>> predict = inference_model(model, 'demo/demo.JPEG')
4343
>>> print(predict['pred_class'])
4444
sea snake
@@ -50,10 +50,10 @@ sea snake
5050

5151
```python
5252
>>> import torch
53-
>>> from mmcls.apis import init_model
53+
>>> from mmcls.apis import get_model
5454
>>>
55-
>>> model = init_model('configs/convnext/convnext-tiny_32xb128_in1k.py', 'https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128-noema_in1k_20220222-2908964a.pth')
56-
>>> inputs = torch.rand(1, 3, 224, 224).to(model.data_preprocessor.device)
55+
>>> model = get_model('convnext-tiny_32xb128_in1k', pretrained=True)
56+
>>> inputs = torch.rand(1, 3, 224, 224))
5757
>>> # To get classification scores.
5858
>>> out = model(inputs)
5959
>>> print(out.shape)
@@ -85,35 +85,37 @@ For more configurable parameters, please refer to the [API](https://mmclassifica
8585
8686
## Results and models
8787
88-
### ImageNet-1k
89-
90-
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
91-
| :-----------: | :----------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------: |
92-
| ConvNeXt-T\* | From scratch | 28.59 | 4.46 | 82.05 | 95.86 | [config](./convnext-tiny_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128_in1k_20220124-18abde00.pth) |
93-
| ConvNeXt-S\* | From scratch | 50.22 | 8.69 | 83.13 | 96.44 | [config](./convnext-small_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128_in1k_20220124-d39b5192.pth) |
94-
| ConvNeXt-B\* | From scratch | 88.59 | 15.36 | 83.85 | 96.74 | [config](./convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth) |
95-
| ConvNeXt-B\* | ImageNet-21k | 88.59 | 15.36 | 85.81 | 97.86 | [config](./convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth) |
96-
| ConvNeXt-L\* | From scratch | 197.77 | 34.37 | 84.30 | 96.89 | [config](./convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth) |
97-
| ConvNeXt-L\* | ImageNet-21k | 197.77 | 34.37 | 86.61 | 98.04 | [config](./convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth) |
98-
| ConvNeXt-XL\* | ImageNet-21k | 350.20 | 60.93 | 86.97 | 98.20 | [config](./convnext-xlarge_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth) |
99-
100-
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
101-
10288
### Pre-trained Models
10389
10490
The pre-trained models on ImageNet-1k or ImageNet-21k are used to fine-tune on the downstream tasks.
10591
106-
| Model | Training Data | Params(M) | Flops(G) | Download |
107-
| :-----------: | :-----------: | :-------: | :------: | :-----------------------------------------------------------------------------------------------------------------------------------: |
108-
| ConvNeXt-T\* | ImageNet-1k | 28.59 | 4.46 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128-noema_in1k_20220222-2908964a.pth) |
109-
| ConvNeXt-S\* | ImageNet-1k | 50.22 | 8.69 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128-noema_in1k_20220222-fa001ca5.pth) |
110-
| ConvNeXt-B\* | ImageNet-1k | 88.59 | 15.36 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128-noema_in1k_20220222-dba4f95f.pth) |
111-
| ConvNeXt-B\* | ImageNet-21k | 88.59 | 15.36 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in21k_20220124-13b83eec.pth) |
112-
| ConvNeXt-L\* | ImageNet-21k | 197.77 | 34.37 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in21k_20220124-41b5a79f.pth) |
113-
| ConvNeXt-XL\* | ImageNet-21k | 350.20 | 60.93 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_3rdparty_in21k_20220124-f909bad7.pth) |
92+
| Model | Training Data | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Download |
93+
| :------------------------------------------------- | :-----------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------------------------------------------: |
94+
| ConvNeXt-T (`convnext-tiny_32xb128-noema_in1k`) | ImageNet-1k | 28.59 | 4.46 | 81.95 | 95.89 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128-noema_in1k_20221208-5d4509c7.pth) |
95+
| ConvNeXt-S (`convnext-small_32xb128-noema_in1k`) | ImageNet-1k | 50.22 | 8.69 | 83.21 | 96.48 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128-noema_in1k_20221208-4a618995.pth) |
96+
| ConvNeXt-B (`convnext-base_32xb128-noema_in1k`) | ImageNet-1k | 88.59 | 15.36 | 83.64 | 96.61 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128-noema_in1k_20221208-f8182678.pth) |
97+
| ConvNeXt-B (`convnext-base_3rdparty-noema_in1k`)\* | ImageNet-1k | 88.59 | 15.36 | 83.71 | 96.60 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128-noema_in1k_20220222-dba4f95f.pth) |
98+
| ConvNeXt-B (`convnext-base_3rdparty_in21k`)\* | ImageNet-21k | 88.59 | 15.36 | N/A | N/A | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in21k_20220124-13b83eec.pth) |
99+
| ConvNeXt-L (`convnext-large_3rdparty_in21k`)\* | ImageNet-21k | 197.77 | 34.37 | N/A | N/A | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in21k_20220124-41b5a79f.pth) |
100+
| ConvNeXt-XL (`convnext-xlarge_3rdparty_in21k`)\* | ImageNet-21k | 350.20 | 60.93 | N/A | N/A | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_3rdparty_in21k_20220124-f909bad7.pth) |
114101
115102
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt).*
116103
104+
### ImageNet-1k
105+
106+
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
107+
| :----------------------------------------------------- | :----------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------: | :-------------------------------------------------------: |
108+
| ConvNeXt-T (`convnext-tiny_32xb128_in1k`) | From scratch | 28.59 | 4.46 | 82.14 | 96.06 | [config](./convnext-tiny_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128_in1k_20221207-998cf3e9.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_32xb128_in1k_20221207-998cf3e9.log.json) |
109+
| ConvNeXt-S (`convnext-small_32xb128_in1k`) | From scratch | 50.22 | 8.69 | 83.16 | 96.56 | [config](./convnext-small_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128_in1k_20221207-4ab7052c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_32xb128_in1k_20221207-4ab7052c.log.json) |
110+
| ConvNeXt-B (`convnext-base_32xb128_in1k`) | From scratch | 88.59 | 15.36 | 83.66 | 96.74 | [config](./convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128_in1k_20221207-fbdb5eb9.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_32xb128_in1k_20221207-fbdb5eb9.log.json) |
111+
| ConvNeXt-B (`convnext-base_3rdparty_in1k`)\* | From scratch | 88.59 | 15.36 | 83.85 | 96.74 | [config](./convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth) |
112+
| ConvNeXt-B (`convnext-base_in21k-pre_3rdparty_in1k`)\* | ImageNet 21k | 88.59 | 15.36 | 85.81 | 97.86 | [config](./convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth) |
113+
| ConvNeXt-L (`convnext-large_3rdparty_in1k`)\* | From scratch | 197.77 | 34.37 | 84.30 | 96.89 | [config](./convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth) |
114+
| ConvNeXt-L (`convnext-large_in21k-pre_3rdparty_in1k`)\* | ImageNet 21k | 197.77 | 34.37 | 86.61 | 98.04 | [config](./convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth) |
115+
| ConvNeXt-XL (`convnext-xlarge_in21k-pre_3rdparty_in1k`)\* | ImageNet 21k | 350.20 | 60.93 | 86.97 | 98.20 | [config](./convnext-xlarge_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth) |
116+
117+
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
118+
117119
## Citation
118120
119121
```bibtex

configs/convnext/convnext-base_32xb128_in1k.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
# schedule setting
1212
optim_wrapper = dict(
1313
optimizer=dict(lr=4e-3),
14-
clip_grad=dict(max_norm=5.0),
14+
clip_grad=None,
1515
)
1616

1717
# runtime setting
18-
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
18+
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
1919

2020
# NOTE: `auto_scale_lr` is for automatically scaling LR
2121
# based on the actual training batch size.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
_base_ = [
2+
'../_base_/models/convnext/convnext-base.py',
3+
'../_base_/datasets/imagenet21k_bs128.py',
4+
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
5+
'../_base_/default_runtime.py',
6+
]
7+
8+
# model setting
9+
model = dict(head=dict(num_classes=21841))
10+
11+
# dataset setting
12+
data_preprocessor = dict(num_classes=21841)
13+
train_dataloader = dict(batch_size=128)
14+
15+
# schedule setting
16+
optim_wrapper = dict(
17+
optimizer=dict(lr=4e-3),
18+
clip_grad=dict(max_norm=5.0),
19+
)
20+
21+
# NOTE: `auto_scale_lr` is for automatically scaling LR
22+
# based on the actual training batch size.
23+
# base_batch_size = (32 GPUs) x (128 samples per GPU)
24+
auto_scale_lr = dict(base_batch_size=4096)

configs/convnext/convnext-large_64xb64_in1k.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
# schedule setting
1212
optim_wrapper = dict(
1313
optimizer=dict(lr=4e-3),
14-
clip_grad=dict(max_norm=5.0),
14+
clip_grad=None,
1515
)
1616

1717
# runtime setting
18-
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
18+
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]
1919

2020
# NOTE: `auto_scale_lr` is for automatically scaling LR
2121
# based on the actual training batch size.

0 commit comments

Comments
 (0)