pytorch
diff --git a/‎CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/models.rst
Lines changed: 13 additions & 1 deletion b/‎docs/source/models.rst
Lines changed: 13 additions & 1 deletion
diff --git a/‎hubconf.py
Lines changed: 3 additions & 0 deletions b/‎hubconf.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎references/classification/README.md
Lines changed: 21 additions & 1 deletion b/‎references/classification/README.md
Lines changed: 21 additions & 1 deletion
diff --git a/‎test/builtin_dataset_mocks.py
Lines changed: 28 additions & 0 deletions b/‎test/builtin_dataset_mocks.py
Lines changed: 28 additions & 0 deletions
diff --git a/‎test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl
939 Bytes b/‎test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl
939 Bytes
diff --git a/‎test/expect/ModelTester.test_efficientnet_v2_m_expect.pkl
939 Bytes b/‎test/expect/ModelTester.test_efficientnet_v2_m_expect.pkl
939 Bytes
diff --git a/‎test/expect/ModelTester.test_efficientnet_v2_s_expect.pkl
939 Bytes b/‎test/expect/ModelTester.test_efficientnet_v2_s_expect.pkl
939 Bytes
diff --git a/‎test/test_prototype_transforms.py
Lines changed: 1 addition & 1 deletion b/‎test/test_prototype_transforms.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_prototype_transforms_functional.py
Lines changed: 0 additions & 2 deletions b/‎test/test_prototype_transforms_functional.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎torchvision/_utils.py
Lines changed: 17 additions & 0 deletions b/‎torchvision/_utils.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎torchvision/csrc/io/decoder/gpu/gpu_decoder.cpp
Lines changed: 1 addition & 2 deletions b/‎torchvision/csrc/io/decoder/gpu/gpu_decoder.cpp
Lines changed: 1 addition & 2 deletions
@@ -83,7 +83,7 @@ Instead of relying directly on `black` however, we rely on
 [ufmt](https://github.com/omnilib/ufmt), for compatibility reasons with Facebook
 internal infrastructure.
 
-To format your code, install `ufmt` with `pip install ufmt` and use e.g.:
+To format your code, install `ufmt` with `pip install ufmt==1.3.2 black==21.9b0 usort==0.6.4` and use e.g.:
 
 ```bash
 ufmt format torchvision
 
@@ -38,7 +38,7 @@ architectures for image classification:
 -  `ResNeXt`_
 -  `Wide ResNet`_
 -  `MNASNet`_
--  `EfficientNet`_
+-  `EfficientNet`_ v1 & v2
 -  `RegNet`_
 -  `VisionTransformer`_
 -  `ConvNeXt`_
@@ -70,6 +70,9 @@ You can construct a model with random weights by calling its constructor:
     efficientnet_b5 = models.efficientnet_b5()
     efficientnet_b6 = models.efficientnet_b6()
     efficientnet_b7 = models.efficientnet_b7()
+    efficientnet_v2_s = models.efficientnet_v2_s()
+    efficientnet_v2_m = models.efficientnet_v2_m()
+    efficientnet_v2_l = models.efficientnet_v2_l()
     regnet_y_400mf = models.regnet_y_400mf()
     regnet_y_800mf = models.regnet_y_800mf()
     regnet_y_1_6gf = models.regnet_y_1_6gf()
@@ -122,6 +125,9 @@ These can be constructed by passing ``pretrained=True``:
     efficientnet_b5 = models.efficientnet_b5(pretrained=True)
     efficientnet_b6 = models.efficientnet_b6(pretrained=True)
     efficientnet_b7 = models.efficientnet_b7(pretrained=True)
+    efficientnet_v2_s = models.efficientnet_v2_s(pretrained=True)
+    efficientnet_v2_m = models.efficientnet_v2_m(pretrained=True)
+    efficientnet_v2_l = models.efficientnet_v2_l(pretrained=True)
     regnet_y_400mf = models.regnet_y_400mf(pretrained=True)
     regnet_y_800mf = models.regnet_y_800mf(pretrained=True)
     regnet_y_1_6gf = models.regnet_y_1_6gf(pretrained=True)
@@ -238,6 +244,9 @@ EfficientNet-B4                   83.384          96.594
 EfficientNet-B5                   83.444          96.628
 EfficientNet-B6                   84.008          96.916
 EfficientNet-B7                   84.122          96.908
+EfficientNetV2-s                  84.228          96.878
+EfficientNetV2-m                  85.112          97.156
+EfficientNetV2-l                  85.810          97.792
 regnet_x_400mf                    72.834          90.950
 regnet_x_800mf                    75.212          92.348
 regnet_x_1_6gf                    77.040          93.440
@@ -439,6 +448,9 @@ EfficientNet
     efficientnet_b5
     efficientnet_b6
     efficientnet_b7
+    efficientnet_v2_s
+    efficientnet_v2_m
+    efficientnet_v2_l
 
 RegNet
 ------------
 
@@ -13,6 +13,9 @@
     efficientnet_b5,
     efficientnet_b6,
     efficientnet_b7,
+    efficientnet_v2_s,
+    efficientnet_v2_m,
+    efficientnet_v2_l,
 )
 from torchvision.models.googlenet import googlenet
 from torchvision.models.inception import inception_v3
 
@@ -88,7 +88,7 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@
 and [#3354](https://github.com/pytorch/vision/pull/3354) for details.
 
 
-### EfficientNet
+### EfficientNet-V1
 
 The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108).
 
@@ -114,6 +114,26 @@ torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --interpolation bic
       --val-resize-size 600 --val-crop-size 600 --train-crop-size 600 --test-only --pretrained
 ```
 
+
+### EfficientNet-V2
+```
+torchrun --nproc_per_node=8 train.py \
+--model $MODEL --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr \
+--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \
+--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.00002 --norm-weight-decay 0.0 \
+--train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \
+--ra-sampler --ra-reps 4
+```
+Here `$MODEL` is one of `efficientnet_v2_s` and `efficientnet_v2_m`. 
+Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the Medium `384` and `480` respectively.
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generatring the pre-trained weights, we trained with 4 nodes, each with 8 GPUs (for a total of 32 GPUs),
+and `--batch_size 32`.
+
+The weights of the Large variant are ported from the original paper rather than trained from scratch. See the `EfficientNet_V2_L_Weights` entry for their exact preprocessing transforms.
+
+
 ### RegNet
 
 #### Small models
 
@@ -878,6 +878,34 @@ def celeba(info, root, config):
     return CelebAMockData.generate(root)[config.split]
 
 
+@register_mock
+def country211(info, root, config):
+    split_name_mapper = {
+        "train": "train",
+        "val": "valid",
+        "test": "test",
+    }
+    split_folder = pathlib.Path(root, "country211", split_name_mapper[config["split"]])
+    split_folder.mkdir(parents=True, exist_ok=True)
+
+    num_examples = {
+        "train": 3,
+        "val": 4,
+        "test": 5,
+    }[config["split"]]
+
+    classes = ("AD", "BS", "GR")
+    for cls in classes:
+        create_image_folder(
+            split_folder,
+            name=cls,
+            file_name_fn=lambda idx: f"{idx}.jpg",
+            num_examples=num_examples,
+        )
+    make_tar(root, f"{split_folder.parent.name}.tgz", split_folder.parent, compression="gz")
+    return num_examples * len(classes)
+
+
 @register_mock
 def dtd(info, root, config):
     data_folder = root / "dtd"
 
@@ -126,7 +126,7 @@ def test_auto_augment(self, transform, input):
             (
                 transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]),
                 itertools.chain.from_iterable(
-                    fn(color_spaces=["rgb"], dtypes=[torch.float32])
+                    fn(color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32])
                     for fn in [
                         make_images,
                         make_vanilla_tensor_images,
 
@@ -14,8 +14,6 @@
 def make_image(size=None, *, color_space, extra_dims=(), dtype=torch.float32):
     size = size or torch.randint(16, 33, (2,)).tolist()
 
-    if isinstance(color_space, str):
-        color_space = features.ColorSpace[color_space]
     num_channels = {
         features.ColorSpace.GRAYSCALE: 1,
         features.ColorSpace.RGB: 3,
 
@@ -0,0 +1,17 @@
+import enum
+
+
+class StrEnumMeta(enum.EnumMeta):
+    auto = enum.auto
+
+    def from_str(self, member: str):
+        try:
+            return self[member]
+        except KeyError:
+            # TODO: use `add_suggestion` from torchvision.prototype.utils._internal to improve the error message as
+            #  soon as it is migrated.
+            raise ValueError(f"Unknown value '{member}' for {self.__name__}.") from None
+
+
+class StrEnum(enum.Enum, metaclass=StrEnumMeta):
+    pass
@@ -29,8 +29,7 @@ torch::Tensor GPUDecoder::decode() {
   unsigned long videoBytes = 0;
   uint8_t* video = nullptr;
   at::cuda::CUDAGuard device_guard(device);
-  auto options = torch::TensorOptions().dtype(torch::kU8).device(torch::kCUDA);
-  torch::Tensor frame = torch::zeros({0}, options);
+  torch::Tensor frame;
   do {
     demuxer.demux(&video, &videoBytes);
     decoder.decode(video, videoBytes);
Original file line number	Diff line number	Diff line change
`@@ -126,7 +126,7 @@ def test_auto_augment(self, transform, input):`
`126`	`126`	`(`
`127`	`127`	`transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]),`
`128`	`128`	`itertools.chain.from_iterable(`
`129`		`- fn(color_spaces=["rgb"], dtypes=[torch.float32])`
	`129`	`+ fn(color_spaces=[features.ColorSpace.RGB], dtypes=[torch.float32])`
`130`	`130`	`for fn in [`
`131`	`131`	`make_images,`
`132`	`132`	`make_vanilla_tensor_images,`