Skip to content

Porting docs, examples, tutorials and galleries #5620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 15, 2022
13 changes: 10 additions & 3 deletions android/test_app/make_assets.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import torch
import torchvision
from torch.utils.mobile_optimizer import optimize_for_mobile
from torchvision.models.detection import (
fasterrcnn_mobilenet_v3_large_320_fpn,
FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,
)

print(torch.__version__)

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(
pretrained=True, box_score_thresh=0.7, rpn_post_nms_top_n_test=100, rpn_score_thresh=0.4, rpn_pre_nms_top_n_test=150
model = fasterrcnn_mobilenet_v3_large_320_fpn(
weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT,
box_score_thresh=0.7,
rpn_post_nms_top_n_test=100,
rpn_score_thresh=0.4,
rpn_pre_nms_top_n_test=150,
)

model.eval()
Expand Down
2 changes: 1 addition & 1 deletion examples/cpp/hello_world/trace_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
HERE = osp.dirname(osp.abspath(__file__))
ASSETS = osp.dirname(osp.dirname(HERE))

model = torchvision.models.resnet18(pretrained=False)
model = torchvision.models.resnet18()
model.eval()

traced_model = torch.jit.script(model)
Expand Down
34 changes: 15 additions & 19 deletions gallery/plot_optical_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import torch
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
import torchvision.transforms as T


plt.rcParams["savefig.bbox"] = "tight"
Expand Down Expand Up @@ -88,24 +87,19 @@ def plot(imgs, **imshow_kwargs):
# reduce the image sizes for the example to run faster. Image dimension must be
# divisible by 8.

from torchvision.models.optical_flow import Raft_Large_Weights

def preprocess(batch):
transforms = T.Compose(
[
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=0.5, std=0.5), # map [0, 1] into [-1, 1]
T.Resize(size=(520, 960)),
]
)
batch = transforms(batch)
return batch
weights = Raft_Large_Weights.DEFAULT
transforms = weights.transforms()


# If you can, run this example on a GPU, it will be a lot faster.
device = "cuda" if torch.cuda.is_available() else "cpu"
def preprocess(img1_batch, img2_batch):
img1_batch = F.resize(img1_batch, size=[520, 960])
img2_batch = F.resize(img2_batch, size=[520, 960])
return transforms(img1_batch, img2_batch)[:2]


img1_batch = preprocess(img1_batch).to(device)
img2_batch = preprocess(img2_batch).to(device)
img1_batch, img2_batch = preprocess(img1_batch, img2_batch)

print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")

Expand All @@ -121,7 +115,10 @@ def preprocess(batch):

from torchvision.models.optical_flow import raft_large

model = raft_large(pretrained=True, progress=False).to(device)
# If you can, run this example on a GPU, it will be a lot faster.
device = "cuda" if torch.cuda.is_available() else "cpu"

model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
model = model.eval()

list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
Expand Down Expand Up @@ -182,10 +179,9 @@ def preprocess(batch):
# from torchvision.io import write_jpeg
# for i, (img1, img2) in enumerate(zip(frames, frames[1:])):
# # Note: it would be faster to predict batches of flows instead of individual flows
# img1 = preprocess(img1[None]).to(device)
# img2 = preprocess(img2[None]).to(device)
# img1, img2 = preprocess(img1, img2)

# list_of_flows = model(img1_batch, img2_batch)
# list_of_flows = model(img1.to(device), img1.to(device))
# predicted_flow = list_of_flows[-1][0]
# flow_img = flow_to_image(predicted_flow).to("cpu")
# output_folder = "/tmp/" # Update this to the folder of your choice
Expand Down
8 changes: 5 additions & 3 deletions gallery/plot_repurposing_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,14 @@ def show(imgs):
# Here is demo with a Faster R-CNN model loaded from
# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`

from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights

model = fasterrcnn_resnet50_fpn(pretrained=True, progress=False)
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn(weights=weights, progress=False)
print(img.size())

img = F.convert_image_dtype(img, torch.float)
tranforms = weights.transforms()
img, _ = tranforms(img)
target = {}
target["boxes"] = boxes
target["labels"] = labels = torch.ones((masks.size(0),), dtype=torch.int64)
Expand Down
12 changes: 4 additions & 8 deletions gallery/plot_scripted_tensor_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,20 +85,16 @@ def show(imgs):
# Let's define a ``Predictor`` module that transforms the input tensor and then
# applies an ImageNet model on it.

from torchvision.models import resnet18
from torchvision.models import resnet18, ResNet18_Weights


class Predictor(nn.Module):

def __init__(self):
super().__init__()
self.resnet18 = resnet18(pretrained=True, progress=False).eval()
self.transforms = nn.Sequential(
T.Resize([256, ]), # We use single int value inside a list due to torchscript type restrictions
T.CenterCrop(224),
T.ConvertImageDtype(torch.float),
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
)
weights = ResNet18_Weights.DEFAULT
self.resnet18 = resnet18(weights=weights, progress=False).eval()
self.transforms = weights.transforms()

def forward(self, x: torch.Tensor) -> torch.Tensor:
with torch.no_grad():
Expand Down
39 changes: 27 additions & 12 deletions gallery/plot_visualization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,17 @@ def show(imgs):
# :func:`~torchvision.models.detection.ssd300_vgg16`. For more details
# on the output of such models, you may refer to :ref:`instance_seg_output`.

from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms.functional import convert_image_dtype
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights


batch_int = torch.stack([dog1_int, dog2_int])
batch = convert_image_dtype(batch_int, dtype=torch.float)

model = fasterrcnn_resnet50_fpn(pretrained=True, progress=False)
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

batch, _ = transforms(batch_int)

model = fasterrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

outputs = model(batch)
Expand Down Expand Up @@ -120,13 +123,15 @@ def show(imgs):
# images must be normalized before they're passed to a semantic segmentation
# model.

from torchvision.models.segmentation import fcn_resnet50
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights

weights = FCN_ResNet50_Weights.DEFAULT
transforms = weights.transforms(resize_size=None)

model = fcn_resnet50(pretrained=True, progress=False)
model = fcn_resnet50(weights=weights, progress=False)
model = model.eval()

normalized_batch = F.normalize(batch, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
normalized_batch, _ = transforms(batch)
output = model(normalized_batch)['out']
print(output.shape, output.min().item(), output.max().item())

Expand Down Expand Up @@ -262,8 +267,14 @@ def show(imgs):
# of them may not have masks, like
# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`.

from torchvision.models.detection import maskrcnn_resnet50_fpn
model = maskrcnn_resnet50_fpn(pretrained=True, progress=False)
from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights

weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

batch, _ = transforms(batch_int)

model = maskrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

output = model(batch)
Expand Down Expand Up @@ -378,13 +389,17 @@ def show(imgs):
# Note that the keypoint detection model does not need normalized images.
#

from torchvision.models.detection import keypointrcnn_resnet50_fpn
from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
from torchvision.io import read_image

person_int = read_image(str(Path("assets") / "person1.jpg"))
person_float = convert_image_dtype(person_int, dtype=torch.float)

model = keypointrcnn_resnet50_fpn(pretrained=True, progress=False)
weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

person_float, _ = transforms(person_int)

model = keypointrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

outputs = model([person_float])
Expand Down
13 changes: 10 additions & 3 deletions ios/VisionTestApp/make_assets.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import torch
import torchvision
from torch.utils.mobile_optimizer import optimize_for_mobile
from torchvision.models.detection import (
fasterrcnn_mobilenet_v3_large_320_fpn,
FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,
)

print(torch.__version__)

model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(
pretrained=True, box_score_thresh=0.7, rpn_post_nms_top_n_test=100, rpn_score_thresh=0.4, rpn_pre_nms_top_n_test=150
model = fasterrcnn_mobilenet_v3_large_320_fpn(
weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT,
box_score_thresh=0.7,
rpn_post_nms_top_n_test=100,
rpn_score_thresh=0.4,
rpn_pre_nms_top_n_test=150,
)

model.eval()
Expand Down
2 changes: 1 addition & 1 deletion test/tracing/frcnn/trace_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
HERE = osp.dirname(osp.abspath(__file__))
ASSETS = osp.dirname(osp.dirname(HERE))

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn()
model.eval()

traced_model = torch.jit.script(model)
Expand Down
2 changes: 1 addition & 1 deletion torchvision/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class IntermediateLayerGetter(nn.ModuleDict):

Examples::

>>> m = torchvision.models.resnet18(pretrained=True)
>>> m = torchvision.models.resnet18(weights=ResNet18_Weights.DEFAULT)
>>> # extract layer1 and layer3, giving as names `feat1` and feat2`
>>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
>>> {'layer1': 'feat1', 'layer3': 'feat2'})
Expand Down
27 changes: 18 additions & 9 deletions torchvision/models/detection/backbone_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from torchvision.ops.feature_pyramid_network import ExtraFPNBlock, FeaturePyramidNetwork, LastLevelMaxPool

from .. import mobilenet, resnet
from .._utils import IntermediateLayerGetter
from .._api import WeightsEnum
from .._utils import IntermediateLayerGetter, handle_legacy_interface


class BackboneWithFPN(nn.Module):
Expand Down Expand Up @@ -55,9 +56,13 @@ def forward(self, x: Tensor) -> Dict[str, Tensor]:
return x


@handle_legacy_interface(
weights=("pretrained", True), # type: ignore[arg-type]
)
def resnet_fpn_backbone(
*,
backbone_name: str,
pretrained: bool,
weights: Optional[WeightsEnum],
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 3,
returned_layers: Optional[List[int]] = None,
Expand All @@ -69,7 +74,7 @@ def resnet_fpn_backbone(
Examples::

>>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
>>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3)
>>> backbone = resnet_fpn_backbone('resnet50', weights=ResNet50_Weights.DEFAULT, trainable_layers=3)
>>> # get some dummy image
>>> x = torch.rand(1,3,64,64)
>>> # compute the output
Expand All @@ -85,7 +90,7 @@ def resnet_fpn_backbone(
Args:
backbone_name (string): resnet architecture. Possible values are 'resnet18', 'resnet34', 'resnet50',
'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet
weights (WeightsEnum, optional): The pretrained weights for the model
norm_layer (callable): it is recommended to use the default value. For details visit:
(https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
trainable_layers (int): number of trainable (not frozen) layers starting from final block.
Expand All @@ -98,7 +103,7 @@ def resnet_fpn_backbone(
a new list of feature maps and their corresponding names. By
default a ``LastLevelMaxPool`` is used.
"""
backbone = resnet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
backbone = resnet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _resnet_fpn_extractor(backbone, trainable_layers, returned_layers, extra_blocks)


Expand Down Expand Up @@ -135,13 +140,13 @@ def _resnet_fpn_extractor(


def _validate_trainable_layers(
pretrained: bool,
is_trained: bool,
trainable_backbone_layers: Optional[int],
max_value: int,
default_value: int,
) -> int:
# don't freeze any layers if pretrained model or backbone is not used
if not pretrained:
if not is_trained:
if trainable_backbone_layers is not None:
warnings.warn(
"Changing trainable_backbone_layers has not effect if "
Expand All @@ -160,16 +165,20 @@ def _validate_trainable_layers(
return trainable_backbone_layers


@handle_legacy_interface(
weights=("pretrained", True), # type: ignore[arg-type]
)
def mobilenet_backbone(
*,
backbone_name: str,
pretrained: bool,
weights: Optional[WeightsEnum],
fpn: bool,
norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
trainable_layers: int = 2,
returned_layers: Optional[List[int]] = None,
extra_blocks: Optional[ExtraFPNBlock] = None,
) -> nn.Module:
backbone = mobilenet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
backbone = mobilenet.__dict__[backbone_name](weights=weights, norm_layer=norm_layer)
return _mobilenet_extractor(backbone, fpn, trainable_layers, returned_layers, extra_blocks)


Expand Down
8 changes: 4 additions & 4 deletions torchvision/models/detection/faster_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class FasterRCNN(GeneralizedRCNN):
>>> from torchvision.models.detection.rpn import AnchorGenerator
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # FasterRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
Expand Down Expand Up @@ -415,7 +415,7 @@ def fasterrcnn_resnet50_fpn(

Example::

>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
>>> # For training
>>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
>>> boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
Expand Down Expand Up @@ -532,7 +532,7 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(

Example::

>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Expand Down Expand Up @@ -589,7 +589,7 @@ def fasterrcnn_mobilenet_v3_large_fpn(

Example::

>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
>>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
>>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x)
Expand Down
Loading