pytorch · jdsgomes · May 19, 2022 · May 18, 2022 · May 18, 2022 · May 18, 2022
diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -101,6 +101,8 @@ You can construct a model with random weights by calling its constructor:
     convnext_base = models.convnext_base()
     convnext_large = models.convnext_large()
     swin_t = models.swin_t()
+    swin_s = models.swin_s()
+    swin_b = models.swin_b()
 
 We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
 
@@ -226,7 +228,9 @@ convnext_tiny                     82.520          96.146
 convnext_small                    83.616          96.650
 convnext_base                     84.062          96.870
 convnext_large                    84.414          96.976
-swin_t                            81.358          95.526
+swin_t                            81.474          95.776
+swin_s                            83.196          96.360      
+swin_b                            83.582          96.640
 ================================  =============   =============
 
 
@@ -468,6 +472,8 @@ SwinTransformer
     :template: function.rst
 
     swin_t
+    swin_s
+    swin_b
 
 Quantized Models
 ----------------

diff --git a/docs/source/models/swin_transformer.rst b/docs/source/models/swin_transformer.rst
@@ -23,3 +23,5 @@ more details about this class.
     :template: function.rst
 
     swin_t
+    swin_s
+    swin_b
diff --git a/references/classification/README.md b/references/classification/README.md
@@ -228,14 +228,14 @@ and `--batch_size 64`.
 ### SwinTransformer
 ```
 torchrun --nproc_per_node=8 train.py\ 
---model swin_t  --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0\
---bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear\
---lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8\
---clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ra
+--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0  --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear  --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4  --val-resize-size 224
 ```
+Here `$MODEL` is one of `swin_t`, `swin_s` or `swin_b`.
 Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value.
 
 
+
+
 ### ShuffleNet V2
 ```
 torchrun --nproc_per_node=8 train.py \

diff --git a/test/expect/ModelTester.test_swin_b_expect.pkl b/test/expect/ModelTester.test_swin_b_expect.pkl
diff --git a/test/expect/ModelTester.test_swin_s_expect.pkl b/test/expect/ModelTester.test_swin_s_expect.pkl
diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py
@@ -18,7 +18,11 @@
 __all__ = [
     "SwinTransformer",
     "Swin_T_Weights",
+    "Swin_S_Weights",
+    "Swin_B_Weights",
     "swin_t",
+    "swin_s",
+    "swin_b",
 ]
 
 
@@ -408,18 +412,60 @@ def _swin_transformer(
 
 class Swin_T_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
-        url="https://download.pytorch.org/models/swin_t-81486767.pth",
+        url="https://download.pytorch.org/models/swin_t-4c37bd06.pth",
         transforms=partial(
-            ImageClassification, crop_size=224, resize_size=238, interpolation=InterpolationMode.BICUBIC
+            ImageClassification, crop_size=224, resize_size=232, interpolation=InterpolationMode.BICUBIC
         ),
         meta={
             **_COMMON_META,
             "num_params": 28288354,
             "min_size": (224, 224),
             "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer",
             "metrics": {
-                "acc@1": 81.358,
-                "acc@5": 95.526,
+                "acc@1": 81.474,
+                "acc@5": 95.776,
+            },
+            "_docs": """These weights reproduce closely the results of the paper using its training recipe.""",
+        },
+    )
+    DEFAULT = IMAGENET1K_V1
+
+
+class Swin_S_Weights(WeightsEnum):
+    IMAGENET1K_V1 = Weights(
+        url="https://download.pytorch.org/models/swin_s-30134662.pth",
+        transforms=partial(
+            ImageClassification, crop_size=224, resize_size=246, interpolation=InterpolationMode.BICUBIC
+        ),
+        meta={
+            **_COMMON_META,
+            "num_params": 49606258,
+            "min_size": (224, 224),
+            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer",
+            "metrics": {
+                "acc@1": 83.196,
+                "acc@5": 96.360,
+            },
+            "_docs": """These weights reproduce closely the results of the paper using its training recipe.""",
+        },
+    )
+    DEFAULT = IMAGENET1K_V1
+
+
+class Swin_B_Weights(WeightsEnum):
+    IMAGENET1K_V1 = Weights(
+        url="https://download.pytorch.org/models/swin_b-1f1feb5c.pth",
+        transforms=partial(
+            ImageClassification, crop_size=224, resize_size=238, interpolation=InterpolationMode.BICUBIC
+        ),
+        meta={
+            **_COMMON_META,
+            "num_params": 87768224,
+            "min_size": (224, 224),
+            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer",
+            "metrics": {
+                "acc@1": 83.582,
+                "acc@5": 96.640,
             },
             "_docs": """These weights reproduce closely the results of the paper using its training recipe.""",
         },
@@ -461,3 +507,75 @@ def swin_t(*, weights: Optional[Swin_T_Weights] = None, progress: bool = True, *
         progress=progress,
         **kwargs,
     )
+
+
+def swin_s(*, weights: Optional[Swin_S_Weights] = None, progress: bool = True, **kwargs: Any) -> SwinTransformer:
+    """
+    Constructs a swin_small architecture from
+    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/pdf/2103.14030>`_.
+
+    Args:
+        weights (:class:`~torchvision.models.Swin_S_Weights`, optional): The
+            pretrained weights to use. See
+            :class:`~torchvision.models.Swin_S_Weights` below for
+            more details, and possible values. By default, no pre-trained
+            weights are used.
+        progress (bool, optional): If True, displays a progress bar of the
+            download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
+            for more details about this class.
+
+    .. autoclass:: torchvision.models.Swin_S_Weights
+        :members:
+    """
+    weights = Swin_S_Weights.verify(weights)
+
+    return _swin_transformer(
+        patch_size=4,
+        embed_dim=96,
+        depths=[2, 2, 18, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        stochastic_depth_prob=0.3,
+        weights=weights,
+        progress=progress,
+        **kwargs,
+    )
+
+
+def swin_b(*, weights: Optional[Swin_B_Weights] = None, progress: bool = True, **kwargs: Any) -> SwinTransformer:
+    """
+    Constructs a swin_base architecture from
+    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/pdf/2103.14030>`_.
+
+    Args:
+        weights (:class:`~torchvision.models.Swin_B_Weights`, optional): The
+            pretrained weights to use. See
+            :class:`~torchvision.models.Swin_B_Weights` below for
+            more details, and possible values. By default, no pre-trained
+            weights are used.
+        progress (bool, optional): If True, displays a progress bar of the
+            download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
+            for more details about this class.
+
+    .. autoclass:: torchvision.models.Swin_B_Weights
+        :members:
+    """
+    weights = Swin_B_Weights.verify(weights)
+
+    return _swin_transformer(
+        patch_size=4,
+        embed_dim=128,
+        depths=[2, 2, 18, 2],
+        num_heads=[4, 8, 16, 32],
+        window_size=7,
+        stochastic_depth_prob=0.5,
+        weights=weights,
+        progress=progress,
+        **kwargs,
+    )
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,3 +23,5 @@ more details about this class. @@
         :template: function.rst
         swin_t
+        swin_s
+        swin_b