diff --git a/cmake/iOS.cmake b/cmake/iOS.cmake
index d42ea4c9232..935c57f11b9 100644
--- a/cmake/iOS.cmake
+++ b/cmake/iOS.cmake
@@ -10,11 +10,11 @@
 #   SIMULATOR - used to build for the Simulator platforms, which have an x86 arch.
 #
 # CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder
-#   By default this location is automatcially chosen based on the IOS_PLATFORM value above.
+#   By default this location is automatically chosen based on the IOS_PLATFORM value above.
 #   If set manually, it will override the default location and force the user of a particular Developer Platform
 #
 # CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder
-#   By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value.
+#   By default this location is automatically chosen based on the CMAKE_IOS_DEVELOPER_ROOT value.
 #   In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path.
 #   If set manually, this will force the use of a specific SDK version
 
@@ -100,7 +100,7 @@ if(IOS_DEPLOYMENT_TARGET)
   set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}")
 endif()
 
-# Hidden visibilty is required for cxx on iOS
+# Hidden visibility is required for cxx on iOS
 set(CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}")
 set(CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden")
 
diff --git a/docs/source/models/fcos.rst b/docs/source/models/fcos.rst
index 4673d312e21..085f26549b8 100644
--- a/docs/source/models/fcos.rst
+++ b/docs/source/models/fcos.rst
@@ -12,7 +12,7 @@ Model builders
 --------------
 
 The following model builders can be used to instantiate a FCOS model, with or
-without pre-trained weights. All the model buidlers internally rely on the
+without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.detection.fcos.FCOS`` base class. Please refer to the `source code
 <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/fcos.py>`_ for
 more details about this class.
diff --git a/docs/source/models/retinanet.rst b/docs/source/models/retinanet.rst
index 8613ae9aaab..910692ef3a5 100644
--- a/docs/source/models/retinanet.rst
+++ b/docs/source/models/retinanet.rst
@@ -12,7 +12,7 @@ Model builders
 --------------
 
 The following model builders can be used to instantiate a RetinaNet model, with or
-without pre-trained weights. All the model buidlers internally rely on the
+without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.detection.retinanet.RetinaNet`` base class. Please refer to the `source code
 <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/retinanet.py>`_ for
 more details about this class.
diff --git a/docs/source/models/vgg.rst b/docs/source/models/vgg.rst
index a9fa9aabfb1..77b5686927c 100644
--- a/docs/source/models/vgg.rst
+++ b/docs/source/models/vgg.rst
@@ -11,7 +11,7 @@ Model builders
 --------------
 
 The following model builders can be used to instantiate a VGG model, with or
-without pre-trained weights. All the model buidlers internally rely on the
+without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.vgg.VGG`` base class. Please refer to the `source code
 <https://github.com/pytorch/vision/blob/main/torchvision/models/vgg.py>`_ for
 more details about this class.
diff --git a/gallery/others/plot_optical_flow.py b/gallery/others/plot_optical_flow.py
index bc734a6e0ec..3ab14493417 100644
--- a/gallery/others/plot_optical_flow.py
+++ b/gallery/others/plot_optical_flow.py
@@ -134,7 +134,7 @@ def preprocess(img1_batch, img2_batch):
 # (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration"
 # in the model. For more details on the iterative nature of the model, please
 # refer to the `original paper <https://arxiv.org/abs/2003.12039>`_. Here, we
-# are only interested in the final predicted flows (they are the most acccurate
+# are only interested in the final predicted flows (they are the most accurate
 # ones), so we will just retrieve the last item in the list.
 #
 # As described above, a flow is a tensor with dimensions (2, H, W) (or (N, 2, H,
@@ -151,7 +151,7 @@ def preprocess(img1_batch, img2_batch):
 # %%
 # Visualizing predicted flows
 # ---------------------------
-# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utlity to
+# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utility to
 # convert a flow into an RGB image. It also supports batches of flows.
 # each "direction" in the flow will be mapped to a given RGB color. In the
 # images below, pixels with similar colors are assumed by the model to be moving
diff --git a/gallery/v2_transforms/plot_custom_transforms.py b/gallery/v2_transforms/plot_custom_transforms.py
index ababcd4968b..912ddf323ff 100644
--- a/gallery/v2_transforms/plot_custom_transforms.py
+++ b/gallery/v2_transforms/plot_custom_transforms.py
@@ -84,7 +84,7 @@ def forward(self, img, bboxes, label):  # we assume inputs are always structured
 # In the section above, we have assumed that you already know the structure of
 # your inputs and that you're OK with hard-coding this expected structure in
 # your code. If you want your custom transforms to be as flexible as possible,
-# this can be a bit limitting.
+# this can be a bit limiting.
 #
 # A key feature of the builtin Torchvision V2 transforms is that they can accept
 # arbitrary input structure and return the same structure as output (with
diff --git a/test/test_models.py b/test/test_models.py
index 67eb2115c85..76bddebefe4 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -1037,7 +1037,7 @@ def test_raft(model_fn, scripted):
     torch.manual_seed(0)
 
     # We need very small images, otherwise the pickle size would exceed the 50KB
-    # As a resut we need to override the correlation pyramid to not downsample
+    # As a result we need to override the correlation pyramid to not downsample
     # too much, otherwise we would get nan values (effective H and W would be
     # reduced to 1)
     corr_block = models.optical_flow.raft.CorrBlock(num_levels=2, radius=2)
diff --git a/torchvision/datapoints/_dataset_wrapper.py b/torchvision/datapoints/_dataset_wrapper.py
index f1e7857264a..3f1c41debf5 100644
--- a/torchvision/datapoints/_dataset_wrapper.py
+++ b/torchvision/datapoints/_dataset_wrapper.py
@@ -37,17 +37,17 @@ def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
         * :class:`~torchvision.datasets.CocoDetection`: Instead of returning the target as list of dicts, the wrapper
           returns a dict of lists. In addition, the key-value-pairs ``"boxes"`` (in ``XYXY`` coordinate format),
           ``"masks"`` and ``"labels"`` are added and wrap the data in the corresponding ``torchvision.datapoints``.
-          The original keys are preserved. If ``target_keys`` is ommitted, returns only the values for the
+          The original keys are preserved. If ``target_keys`` is omitted, returns only the values for the
           ``"image_id"``, ``"boxes"``, and ``"labels"``.
         * :class:`~torchvision.datasets.VOCDetection`: The key-value-pairs ``"boxes"`` and ``"labels"`` are added to
           the target and wrap the data in the corresponding ``torchvision.datapoints``. The original keys are
-          preserved. If ``target_keys`` is ommitted, returns only the values for the ``"boxes"`` and ``"labels"``.
+          preserved. If ``target_keys`` is omitted, returns only the values for the ``"boxes"`` and ``"labels"``.
         * :class:`~torchvision.datasets.CelebA`: The target for ``target_type="bbox"`` is converted to the ``XYXY``
           coordinate format and wrapped into a :class:`~torchvision.datapoints.BoundingBoxes` datapoint.
         * :class:`~torchvision.datasets.Kitti`: Instead returning the target as list of dicts, the wrapper returns a
           dict of lists. In addition, the key-value-pairs ``"boxes"`` and ``"labels"`` are added and wrap the data
           in the corresponding ``torchvision.datapoints``. The original keys are preserved. If ``target_keys`` is
-          ommitted, returns only the values for the ``"boxes"`` and ``"labels"``.
+          omitted, returns only the values for the ``"boxes"`` and ``"labels"``.
         * :class:`~torchvision.datasets.OxfordIIITPet`: The target for ``target_type="segmentation"`` is wrapped into a
           :class:`~torchvision.datapoints.Mask` datapoint.
         * :class:`~torchvision.datasets.Cityscapes`: The target for ``target_type="semantic"`` is wrapped into a
diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py
index b07161d277c..c180e2e1eb8 100644
--- a/torchvision/datasets/_stereo_matching.py
+++ b/torchvision/datasets/_stereo_matching.py
@@ -796,7 +796,7 @@ def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
         # in order to extract disparity from depth maps
         camera_settings_path = Path(file_path).parent / "_camera_settings.json"
         with open(camera_settings_path, "r") as f:
-            # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constatnt)
+            # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constant)
             intrinsics = json.load(f)
             focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"]
             baseline, pixel_constant = 6, 100  # pixel constant is inverted
diff --git a/torchvision/io/video_reader.py b/torchvision/io/video_reader.py
index 1cdcb267d73..0107c82019b 100644
--- a/torchvision/io/video_reader.py
+++ b/torchvision/io/video_reader.py
@@ -91,14 +91,14 @@ class VideoReader:
 
         Each stream descriptor consists of two parts: stream type (e.g. 'video') and
         a unique stream id (which are determined by the video encoding).
-        In this way, if the video contaner contains multiple
+        In this way, if the video container contains multiple
         streams of the same type, users can access the one they want.
         If only stream type is passed, the decoder auto-detects first stream of that type.
 
     Args:
         src (string, bytes object, or tensor): The media source.
             If string-type, it must be a file path supported by FFMPEG.
-            If bytes should be an in memory representatin of a file supported by FFMPEG.
+            If bytes, should be an in-memory representation of a file supported by FFMPEG.
             If Tensor, it is interpreted internally as byte buffer.
             It must be one-dimensional, of type ``torch.uint8``.
 
@@ -279,7 +279,7 @@ def set_current_stream(self, stream: str) -> bool:
                 Currently available stream types include ``['video', 'audio']``.
                 Each descriptor consists of two parts: stream type (e.g. 'video') and
                 a unique stream id (which are determined by video encoding).
-                In this way, if the video contaner contains multiple
+                In this way, if the video container contains multiple
                 streams of the same type, users can access the one they want.
                 If only stream type is passed, the decoder auto-detects first stream
                 of that type and returns it.
diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py
index 0be62ae8a12..a442b2d4be0 100644
--- a/torchvision/transforms/v2/_geometry.py
+++ b/torchvision/transforms/v2/_geometry.py
@@ -1023,7 +1023,7 @@ class ElasticTransform(Transform):
 
     .. note::
         Implementation to transform bounding boxes is approximative (not exact).
-        We construct an approximation of the inverse grid as ``inverse_grid = idenity - displacement``.
+        We construct an approximation of the inverse grid as ``inverse_grid = identity - displacement``.
         This is not an exact inverse of the grid used to transform images, i.e. ``grid = identity + displacement``.
         Our assumption is that ``displacement * displacement`` is small and can be ignored.
         Large displacements would lead to large errors in the approximation.