Skip to content

Commit e0a4b0e

Browse files
datumboxfacebook-github-bot
authored andcommitted
[fbsync] Adding min_size to classification and video models (#5223)
Summary: * Adding min_size as a required field. * Adding min_size to classification models (quantized and not) * Adding min_size to video models meta. * Moving min_size to _COMMON_META * Fixing extra line Reviewed By: jdsgomes, prabhat00155 Differential Revision: D33739383 fbshipit-source-id: 4984d7aebd0e46d8ef9c769b553e8f0faea91654 Co-authored-by: Vasilis Vryniotis <[email protected]>
1 parent d69bc15 commit e0a4b0e

22 files changed

+26
-1
lines changed

test/test_prototype_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def test_naming_conventions(model_fn):
9797
)
9898
@run_if_test_with_prototype
9999
def test_schema_meta_validation(model_fn):
100-
classification_fields = ["size", "categories", "acc@1", "acc@5"]
100+
classification_fields = ["size", "categories", "acc@1", "acc@5", "min_size"]
101101
defaults = {
102102
"all": ["task", "architecture", "publication_year", "interpolation", "recipe", "num_params"],
103103
"models": classification_fields,

torchvision/prototype/models/alexnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class AlexNet_Weights(WeightsEnum):
2323
"publication_year": 2012,
2424
"num_params": 61100840,
2525
"size": (224, 224),
26+
"min_size": (63, 63),
2627
"categories": _IMAGENET_CATEGORIES,
2728
"interpolation": InterpolationMode.BILINEAR,
2829
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg",

torchvision/prototype/models/densenet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def _densenet(
6868
"architecture": "DenseNet",
6969
"publication_year": 2016,
7070
"size": (224, 224),
71+
"min_size": (29, 29),
7172
"categories": _IMAGENET_CATEGORIES,
7273
"interpolation": InterpolationMode.BILINEAR,
7374
"recipe": "https://github.com/pytorch/vision/pull/116",

torchvision/prototype/models/efficientnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def _efficientnet(
6666
"task": "image_classification",
6767
"architecture": "EfficientNet",
6868
"publication_year": 2019,
69+
"min_size": (1, 1),
6970
"categories": _IMAGENET_CATEGORIES,
7071
"interpolation": InterpolationMode.BICUBIC,
7172
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet",

torchvision/prototype/models/googlenet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class GoogLeNet_Weights(WeightsEnum):
2424
"publication_year": 2014,
2525
"num_params": 6624904,
2626
"size": (224, 224),
27+
"min_size": (15, 15),
2728
"categories": _IMAGENET_CATEGORIES,
2829
"interpolation": InterpolationMode.BILINEAR,
2930
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#googlenet",

torchvision/prototype/models/inception.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class Inception_V3_Weights(WeightsEnum):
2323
"publication_year": 2015,
2424
"num_params": 27161264,
2525
"size": (299, 299),
26+
"min_size": (75, 75),
2627
"categories": _IMAGENET_CATEGORIES,
2728
"interpolation": InterpolationMode.BILINEAR,
2829
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#inception-v3",

torchvision/prototype/models/mnasnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"architecture": "MNASNet",
2929
"publication_year": 2018,
3030
"size": (224, 224),
31+
"min_size": (1, 1),
3132
"categories": _IMAGENET_CATEGORIES,
3233
"interpolation": InterpolationMode.BILINEAR,
3334
"recipe": "https://github.com/1e100/mnasnet_trainer",

torchvision/prototype/models/mobilenetv2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class MobileNet_V2_Weights(WeightsEnum):
2323
"publication_year": 2018,
2424
"num_params": 3504872,
2525
"size": (224, 224),
26+
"min_size": (1, 1),
2627
"categories": _IMAGENET_CATEGORIES,
2728
"interpolation": InterpolationMode.BILINEAR,
2829
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#mobilenetv2",

torchvision/prototype/models/mobilenetv3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def _mobilenet_v3(
4242
"architecture": "MobileNetV3",
4343
"publication_year": 2019,
4444
"size": (224, 224),
45+
"min_size": (1, 1),
4546
"categories": _IMAGENET_CATEGORIES,
4647
"interpolation": InterpolationMode.BILINEAR,
4748
}

torchvision/prototype/models/quantization/googlenet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class GoogLeNet_QuantizedWeights(WeightsEnum):
3333
"publication_year": 2014,
3434
"num_params": 6624904,
3535
"size": (224, 224),
36+
"min_size": (15, 15),
3637
"categories": _IMAGENET_CATEGORIES,
3738
"interpolation": InterpolationMode.BILINEAR,
3839
"backend": "fbgemm",

torchvision/prototype/models/quantization/inception.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class Inception_V3_QuantizedWeights(WeightsEnum):
3232
"publication_year": 2015,
3333
"num_params": 27161264,
3434
"size": (299, 299),
35+
"min_size": (75, 75),
3536
"categories": _IMAGENET_CATEGORIES,
3637
"interpolation": InterpolationMode.BILINEAR,
3738
"backend": "fbgemm",

torchvision/prototype/models/quantization/mobilenetv2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum):
3333
"publication_year": 2018,
3434
"num_params": 3504872,
3535
"size": (224, 224),
36+
"min_size": (1, 1),
3637
"categories": _IMAGENET_CATEGORIES,
3738
"interpolation": InterpolationMode.BILINEAR,
3839
"backend": "qnnpack",

torchvision/prototype/models/quantization/mobilenetv3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum):
6666
"publication_year": 2019,
6767
"num_params": 5483032,
6868
"size": (224, 224),
69+
"min_size": (1, 1),
6970
"categories": _IMAGENET_CATEGORIES,
7071
"interpolation": InterpolationMode.BILINEAR,
7172
"backend": "qnnpack",

torchvision/prototype/models/quantization/resnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def _resnet(
5656
_COMMON_META = {
5757
"task": "image_classification",
5858
"size": (224, 224),
59+
"min_size": (1, 1),
5960
"categories": _IMAGENET_CATEGORIES,
6061
"interpolation": InterpolationMode.BILINEAR,
6162
"backend": "fbgemm",

torchvision/prototype/models/quantization/shufflenetv2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def _shufflenetv2(
5555
"architecture": "ShuffleNetV2",
5656
"publication_year": 2018,
5757
"size": (224, 224),
58+
"min_size": (1, 1),
5859
"categories": _IMAGENET_CATEGORIES,
5960
"interpolation": InterpolationMode.BILINEAR,
6061
"backend": "fbgemm",

torchvision/prototype/models/regnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"architecture": "RegNet",
5151
"publication_year": 2020,
5252
"size": (224, 224),
53+
"min_size": (1, 1),
5354
"categories": _IMAGENET_CATEGORIES,
5455
"interpolation": InterpolationMode.BILINEAR,
5556
}

torchvision/prototype/models/resnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def _resnet(
5454
_COMMON_META = {
5555
"task": "image_classification",
5656
"size": (224, 224),
57+
"min_size": (1, 1),
5758
"categories": _IMAGENET_CATEGORIES,
5859
"interpolation": InterpolationMode.BILINEAR,
5960
}

torchvision/prototype/models/shufflenetv2.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def _shufflenetv2(
4545
"architecture": "ShuffleNetV2",
4646
"publication_year": 2018,
4747
"size": (224, 224),
48+
"min_size": (1, 1),
4849
"categories": _IMAGENET_CATEGORIES,
4950
"interpolation": InterpolationMode.BILINEAR,
5051
"recipe": "https://github.com/barrh/Shufflenet-v2-Pytorch/tree/v0.1.0",

torchvision/prototype/models/squeezenet.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class SqueezeNet1_0_Weights(WeightsEnum):
3030
transforms=partial(ImageNetEval, crop_size=224),
3131
meta={
3232
**_COMMON_META,
33+
"min_size": (21, 21),
3334
"num_params": 1248424,
3435
"acc@1": 58.092,
3536
"acc@5": 80.420,
@@ -44,6 +45,7 @@ class SqueezeNet1_1_Weights(WeightsEnum):
4445
transforms=partial(ImageNetEval, crop_size=224),
4546
meta={
4647
**_COMMON_META,
48+
"min_size": (17, 17),
4749
"num_params": 1235496,
4850
"acc@1": 58.178,
4951
"acc@5": 80.624,

torchvision/prototype/models/vgg.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def _vgg(cfg: str, batch_norm: bool, weights: Optional[WeightsEnum], progress: b
4545
"architecture": "VGG",
4646
"publication_year": 2014,
4747
"size": (224, 224),
48+
"min_size": (32, 32),
4849
"categories": _IMAGENET_CATEGORIES,
4950
"interpolation": InterpolationMode.BILINEAR,
5051
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#alexnet-and-vgg",

torchvision/prototype/models/video/resnet.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def _video_resnet(
5555
"task": "video_classification",
5656
"publication_year": 2017,
5757
"size": (112, 112),
58+
"min_size": (1, 1),
5859
"categories": _KINETICS400_CATEGORIES,
5960
"interpolation": InterpolationMode.BILINEAR,
6061
"recipe": "https://github.com/pytorch/vision/tree/main/references/video_classification",

torchvision/prototype/models/vision_transformer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class ViT_B_16_Weights(WeightsEnum):
4545
**_COMMON_META,
4646
"num_params": 86567656,
4747
"size": (224, 224),
48+
"min_size": (224, 224),
4849
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16",
4950
"acc@1": 81.072,
5051
"acc@5": 95.318,
@@ -61,6 +62,7 @@ class ViT_B_32_Weights(WeightsEnum):
6162
**_COMMON_META,
6263
"num_params": 88224232,
6364
"size": (224, 224),
65+
"min_size": (224, 224),
6466
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32",
6567
"acc@1": 75.912,
6668
"acc@5": 92.466,
@@ -77,6 +79,7 @@ class ViT_L_16_Weights(WeightsEnum):
7779
**_COMMON_META,
7880
"num_params": 304326632,
7981
"size": (224, 224),
82+
"min_size": (224, 224),
8083
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16",
8184
"acc@1": 79.662,
8285
"acc@5": 94.638,
@@ -93,6 +96,7 @@ class ViT_L_32_Weights(WeightsEnum):
9396
**_COMMON_META,
9497
"num_params": 306535400,
9598
"size": (224, 224),
99+
"min_size": (224, 224),
96100
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32",
97101
"acc@1": 76.972,
98102
"acc@5": 93.07,

0 commit comments

Comments
 (0)