pytorch
diff --git a/‎docs/source/datapoints.rst
Lines changed: 2 additions & 0 deletions b/‎docs/source/datapoints.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/transforms.rst
Lines changed: 1 addition & 2 deletions b/‎docs/source/transforms.rst
Lines changed: 1 addition & 2 deletions
diff --git a/‎gallery/plot_custom_datapoints.py
Lines changed: 7 additions & 11 deletions b/‎gallery/plot_custom_datapoints.py
Lines changed: 7 additions & 11 deletions
diff --git a/‎gallery/plot_datapoints.py
Lines changed: 64 additions & 35 deletions b/‎gallery/plot_datapoints.py
Lines changed: 64 additions & 35 deletions
diff --git a/‎gallery/plot_transforms_v2_e2e.py
Lines changed: 4 additions & 4 deletions b/‎gallery/plot_transforms_v2_e2e.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎packaging/pre_build_script.sh
Lines changed: 1 addition & 1 deletion b/‎packaging/pre_build_script.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎references/detection/presets.py
Lines changed: 4 additions & 4 deletions b/‎references/detection/presets.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎references/segmentation/presets.py
Lines changed: 3 additions & 3 deletions b/‎references/segmentation/presets.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/common_utils.py
Lines changed: 5 additions & 5 deletions b/‎test/common_utils.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎test/smoke_test.py
Lines changed: 7 additions & 2 deletions b/‎test/smoke_test.py
Lines changed: 7 additions & 2 deletions
@@ -18,3 +18,5 @@ see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`.
     BoundingBoxes
     Mask
     Datapoint
+    set_return_type
+    wrap
@@ -228,12 +228,11 @@ Conversion
 
     ToPILImage
     v2.ToPILImage
-    v2.ToImagePIL
     ToTensor
     v2.ToTensor
     PILToTensor
     v2.PILToTensor
-    v2.ToImageTensor
+    v2.ToImage
     ConvertImageDtype
     v2.ConvertImageDtype
     v2.ToDtype
 
@@ -3,7 +3,7 @@
 How to write your own Datapoint class
 =====================================
 
-This guide is intended for downstream library maintainers. We explain how to
+This guide is intended for advanced users and downstream library maintainers. We explain how to
 write your own datapoint class, and how to make it compatible with the built-in
 Torchvision v2 transforms. Before continuing, make sure you have read
 :ref:`sphx_glr_auto_examples_plot_datapoints.py`.
@@ -49,28 +49,24 @@ class MyDatapoint(datapoints.Datapoint):
 from torchvision.transforms.v2 import functional as F
 
 
-@F.register_kernel(dispatcher="hflip", datapoint_cls=MyDatapoint)
+@F.register_kernel(functional="hflip", datapoint_cls=MyDatapoint)
 def hflip_my_datapoint(my_dp, *args, **kwargs):
     print("Flipping!")
     out = my_dp.flip(-1)
-    return MyDatapoint.wrap_like(my_dp, out)
+    return datapoints.wrap(out, like=my_dp)
 
 
 # %%
-# To understand why ``wrap_like`` is used, see
+# To understand why :func:`~torchvision.datapoints.wrap` is used, see
 # :ref:`datapoint_unwrapping_behaviour`. Ignore the ``*args, **kwargs`` for now,
 # we will explain it below in :ref:`param_forwarding`.
 #
 # .. note::
 #
 #     In our call to ``register_kernel`` above we used a string
-#     ``dispatcher="hflip"`` to refer to the functional we want to hook into. We
+#     ``functional="hflip"`` to refer to the functional we want to hook into. We
 #     could also have used the  functional *itself*, i.e.
-#     ``@register_kernel(dispatcher=F.hflip, ...)``.
-#
-#     The functionals that you can be hooked into are the ones in
-#     ``torchvision.transforms.v2.functional`` and they are documented in
-#     :ref:`functional_transforms`.
+#     ``@register_kernel(functional=F.hflip, ...)``.
 #
 # Now that we have registered our kernel, we can call the functional API on a
 # ``MyDatapoint`` instance:
@@ -111,7 +107,7 @@ def hflip_my_datapoint(my_dp, *args, **kwargs):
 def hflip_my_datapoint(my_dp):  # noqa
     print("Flipping!")
     out = my_dp.flip(-1)
-    return MyDatapoint.wrap_like(my_dp, out)
+    return datapoints.wrap(out, like=my_dp)
 
 
 # %%
 
@@ -48,26 +48,22 @@
 # Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
 # for the input data.
 #
+# :mod:`torchvision.datapoints` supports four types of datapoints:
+#
+# * :class:`~torchvision.datapoints.Image`
+# * :class:`~torchvision.datapoints.Video`
+# * :class:`~torchvision.datapoints.BoundingBoxes`
+# * :class:`~torchvision.datapoints.Mask`
+#
 # What can I do with a datapoint?
 # -------------------------------
 #
 # Datapoints look and feel just like regular tensors - they **are** tensors.
 # Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()`` or
-# any ``torch.*`` operator will also works on datapoints. See
+# any ``torch.*`` operator will also work on datapoints. See
 # :ref:`datapoint_unwrapping_behaviour` for a few gotchas.
 
 # %%
-#
-# What datapoints are supported?
-# ------------------------------
-#
-# So far :mod:`torchvision.datapoints` supports four types of datapoints:
-#
-# * :class:`~torchvision.datapoints.Image`
-# * :class:`~torchvision.datapoints.Video`
-# * :class:`~torchvision.datapoints.BoundingBoxes`
-# * :class:`~torchvision.datapoints.Mask`
-#
 # .. _datapoint_creation:
 #
 # How do I construct a datapoint?
@@ -111,26 +107,23 @@
 print(bboxes)
 
 # %%
-# Using the ``wrap_like()`` class method
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# Using ``datapoints.wrap()``
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# You can also use the ``wrap_like()`` class method to wrap a tensor object
+# You can also use the :func:`~torchvision.datapoints.wrap` function to wrap a tensor object
 # into a datapoint. This is useful when you already have an object of the
 # desired type, which typically happens when writing transforms: you just want
-# to wrap the output like the input. This API is inspired by utils like
-# :func:`torch.zeros_like`:
+# to wrap the output like the input.
 
 new_bboxes = torch.tensor([0, 20, 30, 40])
-new_bboxes = datapoints.BoundingBoxes.wrap_like(bboxes, new_bboxes)
+new_bboxes = datapoints.wrap(new_bboxes, like=bboxes)
 assert isinstance(new_bboxes, datapoints.BoundingBoxes)
 assert new_bboxes.canvas_size == bboxes.canvas_size
 
 
 # %%
 # The metadata of ``new_bboxes`` is the same as ``bboxes``, but you could pass
-# it as a parameter to override it. Check the
-# :meth:`~torchvision.datapoints.BoundingBoxes.wrap_like` documentation for
-# more details.
+# it as a parameter to override it.
 #
 # Do I have to wrap the output of the datasets myself?
 # ----------------------------------------------------
@@ -209,42 +202,78 @@ def get_transform(train):
 # I had a Datapoint but now I have a Tensor. Help!
 # ------------------------------------------------
 #
-# For a lot of operations involving datapoints, we cannot safely infer whether
-# the result should retain the datapoint type, so we choose to return a plain
-# tensor instead of a datapoint (this might change, see note below):
+# By default, operations on :class:`~torchvision.datapoints.Datapoint` objects
+# will return a pure Tensor:
 
 
 assert isinstance(bboxes, datapoints.BoundingBoxes)
 
 # Shift bboxes by 3 pixels in both H and W
 new_bboxes = bboxes + 3
 
-assert isinstance(new_bboxes, torch.Tensor) and not isinstance(new_bboxes, datapoints.BoundingBoxes)
+assert isinstance(new_bboxes, torch.Tensor)
+assert not isinstance(new_bboxes, datapoints.BoundingBoxes)
+
+# %%
+# .. note::
+#
+#    This behavior only affects native ``torch`` operations. If you are using
+#    the built-in ``torchvision`` transforms or functionals, you will always get
+#    as output the same type that you passed as input (pure ``Tensor`` or
+#    ``Datapoint``).
 
 # %%
-# If you're writing your own custom transforms or code involving datapoints, you
-# can re-wrap the output into a datapoint by just calling their constructor, or
-# by using the ``.wrap_like()`` class method:
+# But I want a Datapoint back!
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# You can re-wrap a pure tensor into a datapoint by just calling the datapoint
+# constructor, or by using the :func:`~torchvision.datapoints.wrap` function
+# (see more details above in :ref:`datapoint_creation`):
 
 new_bboxes = bboxes + 3
-new_bboxes = datapoints.BoundingBoxes.wrap_like(bboxes, new_bboxes)
+new_bboxes = datapoints.wrap(new_bboxes, like=bboxes)
 assert isinstance(new_bboxes, datapoints.BoundingBoxes)
 
 # %%
-# See more details above in :ref:`datapoint_creation`.
+# Alternatively, you can use the :func:`~torchvision.datapoints.set_return_type`
+# as a global config setting for the whole program, or as a context manager:
+
+with datapoints.set_return_type("datapoint"):
+    new_bboxes = bboxes + 3
+assert isinstance(new_bboxes, datapoints.BoundingBoxes)
+
+# %%
+# Why is this happening?
+# ^^^^^^^^^^^^^^^^^^^^^^
 #
-# .. note::
+# **For performance reasons**. :class:`~torchvision.datapoints.Datapoint`
+# classes are Tensor subclasses, so any operation involving a
+# :class:`~torchvision.datapoints.Datapoint` object will go through the
+# `__torch_function__
+# <https://pytorch.org/docs/stable/notes/extending.html#extending-torch>`_
+# protocol. This induces a small overhead, which we want to avoid when possible.
+# This doesn't matter for built-in ``torchvision`` transforms because we can
+# avoid the overhead there, but it could be a problem in your model's
+# ``forward``.
 #
-#    You never need to re-wrap manually if you're using the built-in transforms
-#    or their functional equivalents: this is automatically taken care of for
-#    you.
+# **The alternative isn't much better anyway.** For every operation where
+# preserving the :class:`~torchvision.datapoints.Datapoint` type makes
+# sense, there are just as many operations where returning a pure Tensor is
+# preferable: for example, is ``img.sum()`` still an :class:`~torchvision.datapoints.Image`?
+# If we were to preserve :class:`~torchvision.datapoints.Datapoint` types all
+# the way, even model's logits or the output of the loss function would end up
+# being of type :class:`~torchvision.datapoints.Image`, and surely that's not
+# desirable.
 #
 # .. note::
 #
-#    This "unwrapping" behaviour is something we're actively seeking feedback on. If you find this surprising or if you
+#    This behaviour is something we're actively seeking feedback on. If you find this surprising or if you
 #    have any suggestions on how to better support your use-cases, please reach out to us via this issue:
 #    https://github.com/pytorch/vision/issues/7319
 #
+# Exceptions
+# ^^^^^^^^^^
+#
 # There are a few exceptions to this "unwrapping" rule:
 #
 # 1. Operations like :meth:`~torch.Tensor.clone`, :meth:`~torch.Tensor.to`,
 
@@ -5,8 +5,8 @@
 
 Object detection is not supported out of the box by ``torchvision.transforms`` v1, since it only supports images.
 ``torchvision.transforms.v2`` enables jointly transforming images, videos, bounding boxes, and masks. This example
-showcases an end-to-end object detection training using the stable ``torchvisio.datasets`` and ``torchvision.models`` as
-well as the new ``torchvision.transforms.v2`` v2 API.
+showcases an end-to-end object detection training using the stable ``torchvision.datasets`` and ``torchvision.models``
+as well as the new ``torchvision.transforms.v2`` v2 API.
 """
 
 import pathlib
@@ -27,7 +27,7 @@ def show(sample):
 
     image, target = sample
     if isinstance(image, PIL.Image.Image):
-        image = F.to_image_tensor(image)
+        image = F.to_image(image)
     image = F.to_dtype(image, torch.uint8, scale=True)
     annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
 
@@ -101,7 +101,7 @@ def load_example_coco_detection_dataset(**kwargs):
         transforms.RandomZoomOut(fill={PIL.Image.Image: (123, 117, 104), "others": 0}),
         transforms.RandomIoUCrop(),
         transforms.RandomHorizontalFlip(),
-        transforms.ToImageTensor(),
+        transforms.ToImage(),
         transforms.ConvertImageDtype(torch.float32),
         transforms.SanitizeBoundingBoxes(),
     ]
 
@@ -11,7 +11,7 @@ if [[ "$(uname)" == Darwin ]]; then
   conda install -yq wget
 fi
 
-if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
+if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" || "$ARCH" == "aarch64" ]]; then
   # Install libpng from Anaconda (defaults)
   conda install libpng -yq
   conda install -yq ffmpeg=4.2 libjpeg-turbo -c pytorch
 
@@ -33,7 +33,7 @@ def __init__(
         transforms = []
         backend = backend.lower()
         if backend == "datapoint":
-            transforms.append(T.ToImageTensor())
+            transforms.append(T.ToImage())
         elif backend == "tensor":
             transforms.append(T.PILToTensor())
         elif backend != "pil":
@@ -71,7 +71,7 @@ def __init__(
 
         if backend == "pil":
             # Note: we could just convert to pure tensors even in v2.
-            transforms += [T.ToImageTensor() if use_v2 else T.PILToTensor()]
+            transforms += [T.ToImage() if use_v2 else T.PILToTensor()]
 
         transforms += [T.ConvertImageDtype(torch.float)]
 
@@ -94,11 +94,11 @@ def __init__(self, backend="pil", use_v2=False):
         backend = backend.lower()
         if backend == "pil":
             # Note: we could just convert to pure tensors even in v2?
-            transforms += [T.ToImageTensor() if use_v2 else T.PILToTensor()]
+            transforms += [T.ToImage() if use_v2 else T.PILToTensor()]
         elif backend == "tensor":
             transforms += [T.PILToTensor()]
         elif backend == "datapoint":
-            transforms += [T.ToImageTensor()]
+            transforms += [T.ToImage()]
         else:
             raise ValueError(f"backend can be 'datapoint', 'tensor' or 'pil', but got {backend}")
 
 
@@ -32,7 +32,7 @@ def __init__(
         transforms = []
         backend = backend.lower()
         if backend == "datapoint":
-            transforms.append(T.ToImageTensor())
+            transforms.append(T.ToImage())
         elif backend == "tensor":
             transforms.append(T.PILToTensor())
         elif backend != "pil":
@@ -81,7 +81,7 @@ def __init__(
         if backend == "tensor":
             transforms += [T.PILToTensor()]
         elif backend == "datapoint":
-            transforms += [T.ToImageTensor()]
+            transforms += [T.ToImage()]
         elif backend != "pil":
             raise ValueError(f"backend can be 'datapoint', 'tensor' or 'pil', but got {backend}")
 
@@ -92,7 +92,7 @@ def __init__(
 
         if backend == "pil":
             # Note: we could just convert to pure tensors even in v2?
-            transforms += [T.ToImageTensor() if use_v2 else T.PILToTensor()]
+            transforms += [T.ToImage() if use_v2 else T.PILToTensor()]
 
         transforms += [
             T.ConvertImageDtype(torch.float),
 
@@ -27,7 +27,7 @@
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
 from torchvision import datapoints, io
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
-from torchvision.transforms.v2.functional import to_dtype_image_tensor, to_image_pil, to_image_tensor
+from torchvision.transforms.v2.functional import to_dtype_image, to_image, to_pil_image
 
 
 IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -293,7 +293,7 @@ def __init__(
         **other_parameters,
     ):
         if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]):
-            actual, expected = [to_image_tensor(input) for input in [actual, expected]]
+            actual, expected = [to_image(input) for input in [actual, expected]]
 
         super().__init__(actual, expected, **other_parameters)
         self.mae = mae
@@ -536,7 +536,7 @@ def make_image_tensor(*args, **kwargs):
 
 
 def make_image_pil(*args, **kwargs):
-    return to_image_pil(make_image(*args, **kwargs))
+    return to_pil_image(make_image(*args, **kwargs))
 
 
 def make_image_loader(
@@ -609,12 +609,12 @@ def fn(shape, dtype, device, memory_format):
             )
         )
 
-        image_tensor = to_image_tensor(image_pil)
+        image_tensor = to_image(image_pil)
         if memory_format == torch.contiguous_format:
             image_tensor = image_tensor.to(device=device, memory_format=memory_format, copy=True)
         else:
             image_tensor = image_tensor.to(device=device)
-        image_tensor = to_dtype_image_tensor(image_tensor, dtype=dtype, scale=True)
+        image_tensor = to_dtype_image(image_tensor, dtype=dtype, scale=True)
 
         return datapoints.Image(image_tensor)
 
 
@@ -78,8 +78,13 @@ def smoke_test_torchvision_resnet50_classify(device: str = "cpu") -> None:
 def main() -> None:
     print(f"torchvision: {torchvision.__version__}")
     print(f"torch.cuda.is_available: {torch.cuda.is_available()}")
-    print(f"{torch.ops.image._jpeg_version() = }")
-    assert torch.ops.image._is_compiled_against_turbo()
+
+    # Turn 1.11.0aHASH into 1.11 (major.minor only)
+    version = ".".join(torchvision.__version__.split(".")[:2])
+    if version >= "0.16":
+        print(f"{torch.ops.image._jpeg_version() = }")
+        assert torch.ops.image._is_compiled_against_turbo()
+
     smoke_test_torchvision()
     smoke_test_torchvision_read_decode()
     smoke_test_torchvision_resnet50_classify()