add batch dimension squashing to some kernels

pmeier · pmeier · commit 0d2ad963e276 · 2022-10-06T23:17:31.000+02:00
diff --git a/torchvision/prototype/transforms/functional/_color.py b/torchvision/prototype/transforms/functional/_color.py
@@ -58,7 +58,25 @@ def adjust_contrast(inpt: features.InputTypeJIT, contrast_factor: float) -> feat
 
 
 def adjust_sharpness_video(video: torch.Tensor, sharpness_factor: float) -> torch.Tensor:
-    return adjust_sharpness_image_tensor(video, sharpness_factor=sharpness_factor)
+    # TODO: this is a temporary workaround until the image kernel supports arbitrary batch sizes. Remove this when
+    #  https://github.com/pytorch/vision/issues/6670 is resolved.
+    if video.numel() == 0:
+        return video
+
+    shape = video.shape
+
+    if video.ndim > 4:
+        video = video.view((-1,) + shape[-3:])
+        needs_unsquash = True
+    else:
+        needs_unsquash = False
+
+    output = adjust_sharpness_image_tensor(video, sharpness_factor=sharpness_factor)
+
+    if needs_unsquash:
+        output = output.view(shape)
+
+    return output
 
 
 def adjust_sharpness(inpt: features.InputTypeJIT, sharpness_factor: float) -> features.InputTypeJIT:
@@ -160,7 +178,25 @@ def autocontrast(inpt: features.InputTypeJIT) -> features.InputTypeJIT:
 
 
 def equalize_video(video: torch.Tensor) -> torch.Tensor:
-    return equalize_image_tensor(video)
+    # TODO: this is a temporary workaround until the image kernel supports arbitrary batch sizes. Remove this when
+    #  https://github.com/pytorch/vision/issues/6670 is resolved.
+    if video.numel() == 0:
+        return video
+
+    shape = video.shape
+
+    if video.ndim > 4:
+        video = video.view((-1,) + shape[-3:])
+        needs_unsquash = True
+    else:
+        needs_unsquash = False
+
+    output = equalize_image_tensor(video)
+
+    if needs_unsquash:
+        output = output.view(shape)
+
+    return output
 
 
 def equalize(inpt: features.InputTypeJIT) -> features.InputTypeJIT:
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -1002,7 +1002,25 @@ def perspective_video(
     interpolation: InterpolationMode = InterpolationMode.BILINEAR,
     fill: features.FillTypeJIT = None,
 ) -> torch.Tensor:
-    return perspective_image_tensor(video, perspective_coeffs, interpolation=interpolation, fill=fill)
+    # TODO: this is a temporary workaround until the image kernel supports arbitrary batch sizes. Remove this when
+    #  https://github.com/pytorch/vision/issues/6670 is resolved.
+    if video.numel() == 0:
+        return video
+
+    shape = video.shape
+
+    if video.ndim > 4:
+        video = video.view((-1,) + shape[-3:])
+        needs_unsquash = True
+    else:
+        needs_unsquash = False
+
+    output = perspective_image_tensor(video, perspective_coeffs, interpolation=interpolation, fill=fill)
+
+    if needs_unsquash:
+        output = output.view(shape)
+
+    return output
 
 
 def perspective(
@@ -1105,7 +1123,25 @@ def elastic_video(
     interpolation: InterpolationMode = InterpolationMode.BILINEAR,
     fill: features.FillTypeJIT = None,
 ) -> torch.Tensor:
-    return elastic_image_tensor(video, displacement, interpolation=interpolation, fill=fill)
+    # TODO: this is a temporary workaround until the image kernel supports arbitrary batch sizes. Remove this when
+    #  https://github.com/pytorch/vision/issues/6670 is resolved.
+    if video.numel() == 0:
+        return video
+
+    shape = video.shape
+
+    if video.ndim > 4:
+        video = video.view((-1,) + shape[-3:])
+        needs_unsquash = True
+    else:
+        needs_unsquash = False
+
+    output = elastic_image_tensor(video, displacement, interpolation=interpolation, fill=fill)
+
+    if needs_unsquash:
+        output = output.view(shape)
+
+    return output
 
 
 def elastic(
diff --git a/torchvision/prototype/transforms/functional/_misc.py b/torchvision/prototype/transforms/functional/_misc.py
@@ -71,7 +71,25 @@ def gaussian_blur_image_pil(
 def gaussian_blur_video(
     video: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None
 ) -> torch.Tensor:
-    return gaussian_blur_image_tensor(video, kernel_size, sigma)
+    # TODO: this is a temporary workaround until the image kernel supports arbitrary batch sizes. Remove this when
+    #  https://github.com/pytorch/vision/issues/6670 is resolved.
+    if video.numel() == 0:
+        return video
+
+    shape = video.shape
+
+    if video.ndim > 4:
+        video = video.view((-1,) + shape[-3:])
+        needs_unsquash = True
+    else:
+        needs_unsquash = False
+
+    output = gaussian_blur_image_tensor(video, kernel_size, sigma)
+
+    if needs_unsquash:
+        output = output.view(shape)
+
+    return output
 
 
 def gaussian_blur(