-
Notifications
You must be signed in to change notification settings - Fork 7.1k
add Video feature and kernels #6667
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
29 commits
Select commit
Hold shift + click to select a range
4874907
add video feature
pmeier a1b00b4
add video kernels
pmeier e7a229c
add video testing utils
pmeier 5d8b8b6
add one kernel info
pmeier 2380f10
Merge branch 'main' into video
pmeier a04d667
fix kernel names in Video feature
pmeier 35642b9
use only uint8 for video testing
pmeier ae59458
require at least 4 dims for Video feature
pmeier 0fb1c35
add TODO for image_size -> spatial_size
pmeier 2d1e560
image -> video in feature constructor
pmeier 91e15b2
introduce new combined images and video type
pmeier 81237fe
add video to transform utils
pmeier aa26292
fix transforms test
pmeier 93d7556
fix auto augment
pmeier 6df2f0f
Merge branch 'main' into video
pmeier a99765d
cleanup
pmeier 17ee7f7
Merge branch 'main' into video
pmeier 4506cdf
address review comments
pmeier 36f52dc
add remaining video kernel infos
pmeier 0d2ad96
add batch dimension squashing to some kernels
pmeier f1e2bfa
fix tests and kernel infos
pmeier 93fc321
add xfails for arbitrary batch sizes on some kernels
pmeier f843612
Merge branch 'main' into video
pmeier ad4d424
Merge branch 'main' into video
pmeier d8945e6
fix test setup
pmeier 1c86193
fix equalize_image_tensor for multi batch dims
pmeier 1c2b615
fix adjust_sharpness_image_tensor for multi batch dims
pmeier 7f3a8b7
Merge branch 'video' of https://github.com/pmeier/vision into video
pmeier 2d7b07d
address review comments
pmeier File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ | |
) | ||
from ._label import Label, OneHotLabel | ||
from ._mask import Mask | ||
from ._video import Video |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
from __future__ import annotations | ||
|
||
import warnings | ||
from typing import Any, cast, List, Optional, Tuple, Union | ||
|
||
import torch | ||
from torchvision.transforms.functional import InterpolationMode | ||
|
||
from ._feature import _Feature, FillTypeJIT | ||
from ._image import ColorSpace | ||
|
||
|
||
class Video(_Feature): | ||
pmeier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
color_space: ColorSpace | ||
|
||
def __new__( | ||
cls, | ||
data: Any, | ||
*, | ||
color_space: Optional[Union[ColorSpace, str]] = None, | ||
dtype: Optional[torch.dtype] = None, | ||
device: Optional[Union[torch.device, str, int]] = None, | ||
requires_grad: bool = False, | ||
) -> Video: | ||
data = torch.as_tensor(data, dtype=dtype, device=device) # type: ignore[arg-type] | ||
if data.ndim < 3: | ||
raise ValueError | ||
elif data.ndim == 3: | ||
data = data.unsqueeze(0) | ||
pmeier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
image = super().__new__(cls, data, requires_grad=requires_grad) | ||
|
||
if color_space is None: | ||
color_space = ColorSpace.from_tensor_shape(image.shape) # type: ignore[arg-type] | ||
if color_space == ColorSpace.OTHER: | ||
warnings.warn("Unable to guess a specific color space. Consider passing it explicitly.") | ||
elif isinstance(color_space, str): | ||
color_space = ColorSpace.from_str(color_space.upper()) | ||
elif not isinstance(color_space, ColorSpace): | ||
raise ValueError | ||
image.color_space = color_space | ||
|
||
return image | ||
|
||
def __repr__(self, *, tensor_contents: Any = None) -> str: # type: ignore[override] | ||
return self._make_repr(color_space=self.color_space) | ||
|
||
@classmethod | ||
def new_like( | ||
cls, other: Video, data: Any, *, color_space: Optional[Union[ColorSpace, str]] = None, **kwargs: Any | ||
) -> Video: | ||
return super().new_like( | ||
other, data, color_space=color_space if color_space is not None else other.color_space, **kwargs | ||
) | ||
|
||
@property | ||
def image_size(self) -> Tuple[int, int]: | ||
pmeier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return cast(Tuple[int, int], tuple(self.shape[-2:])) | ||
|
||
@property | ||
def num_channels(self) -> int: | ||
return self.shape[-3] | ||
|
||
@property | ||
def num_frames(self) -> int: | ||
pmeier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return self.shape[-4] | ||
|
||
def to_color_space(self, color_space: Union[str, ColorSpace], copy: bool = True) -> Video: | ||
if isinstance(color_space, str): | ||
color_space = ColorSpace.from_str(color_space.upper()) | ||
|
||
return Video.new_like( | ||
self, | ||
self._F.convert_color_space_video( | ||
self, old_color_space=self.color_space, new_color_space=color_space, copy=copy | ||
), | ||
color_space=color_space, | ||
) | ||
|
||
def horizontal_flip(self) -> Video: | ||
output = self._F.horizontal_flip_Video_tensor(self) | ||
pmeier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return Video.new_like(self, output) | ||
|
||
def vertical_flip(self) -> Video: | ||
output = self._F.vertical_flip_Video_tensor(self) | ||
return Video.new_like(self, output) | ||
|
||
def resize( # type: ignore[override] | ||
self, | ||
size: List[int], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
max_size: Optional[int] = None, | ||
antialias: bool = False, | ||
) -> Video: | ||
output = self._F.resize_Video_tensor( | ||
pmeier marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self, size, interpolation=interpolation, max_size=max_size, antialias=antialias | ||
) | ||
return Video.new_like(self, output) | ||
|
||
def crop(self, top: int, left: int, height: int, width: int) -> Video: | ||
output = self._F.crop_Video_tensor(self, top, left, height, width) | ||
return Video.new_like(self, output) | ||
|
||
def center_crop(self, output_size: List[int]) -> Video: | ||
output = self._F.center_crop_Video_tensor(self, output_size=output_size) | ||
return Video.new_like(self, output) | ||
|
||
def resized_crop( | ||
self, | ||
top: int, | ||
left: int, | ||
height: int, | ||
width: int, | ||
size: List[int], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
antialias: bool = False, | ||
) -> Video: | ||
output = self._F.resized_crop_Video_tensor( | ||
self, top, left, height, width, size=list(size), interpolation=interpolation, antialias=antialias | ||
) | ||
return Video.new_like(self, output) | ||
|
||
def pad( | ||
self, | ||
padding: Union[int, List[int]], | ||
fill: FillTypeJIT = None, | ||
padding_mode: str = "constant", | ||
) -> Video: | ||
output = self._F.pad_Video_tensor(self, padding, fill=fill, padding_mode=padding_mode) | ||
return Video.new_like(self, output) | ||
|
||
def rotate( | ||
self, | ||
angle: float, | ||
interpolation: InterpolationMode = InterpolationMode.NEAREST, | ||
expand: bool = False, | ||
fill: FillTypeJIT = None, | ||
center: Optional[List[float]] = None, | ||
) -> Video: | ||
output = self._F._geometry.rotate_Video_tensor( | ||
self, angle, interpolation=interpolation, expand=expand, fill=fill, center=center | ||
) | ||
return Video.new_like(self, output) | ||
|
||
def affine( | ||
self, | ||
angle: Union[int, float], | ||
translate: List[float], | ||
scale: float, | ||
shear: List[float], | ||
interpolation: InterpolationMode = InterpolationMode.NEAREST, | ||
fill: FillTypeJIT = None, | ||
center: Optional[List[float]] = None, | ||
) -> Video: | ||
output = self._F._geometry.affine_Video_tensor( | ||
self, | ||
angle, | ||
translate=translate, | ||
scale=scale, | ||
shear=shear, | ||
interpolation=interpolation, | ||
fill=fill, | ||
center=center, | ||
) | ||
return Video.new_like(self, output) | ||
|
||
def perspective( | ||
self, | ||
perspective_coeffs: List[float], | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
fill: FillTypeJIT = None, | ||
) -> Video: | ||
output = self._F._geometry.perspective_Video_tensor( | ||
self, perspective_coeffs, interpolation=interpolation, fill=fill | ||
) | ||
return Video.new_like(self, output) | ||
|
||
def elastic( | ||
self, | ||
displacement: torch.Tensor, | ||
interpolation: InterpolationMode = InterpolationMode.BILINEAR, | ||
fill: FillTypeJIT = None, | ||
) -> Video: | ||
output = self._F._geometry.elastic_Video_tensor(self, displacement, interpolation=interpolation, fill=fill) | ||
return Video.new_like(self, output) | ||
|
||
def adjust_brightness(self, brightness_factor: float) -> Video: | ||
output = self._F.adjust_brightness_Video_tensor(self, brightness_factor=brightness_factor) | ||
return Video.new_like(self, output) | ||
|
||
def adjust_saturation(self, saturation_factor: float) -> Video: | ||
output = self._F.adjust_saturation_Video_tensor(self, saturation_factor=saturation_factor) | ||
return Video.new_like(self, output) | ||
|
||
def adjust_contrast(self, contrast_factor: float) -> Video: | ||
output = self._F.adjust_contrast_Video_tensor(self, contrast_factor=contrast_factor) | ||
return Video.new_like(self, output) | ||
|
||
def adjust_sharpness(self, sharpness_factor: float) -> Video: | ||
output = self._F.adjust_sharpness_Video_tensor(self, sharpness_factor=sharpness_factor) | ||
return Video.new_like(self, output) | ||
|
||
def adjust_hue(self, hue_factor: float) -> Video: | ||
output = self._F.adjust_hue_Video_tensor(self, hue_factor=hue_factor) | ||
return Video.new_like(self, output) | ||
|
||
def adjust_gamma(self, gamma: float, gain: float = 1) -> Video: | ||
output = self._F.adjust_gamma_Video_tensor(self, gamma=gamma, gain=gain) | ||
return Video.new_like(self, output) | ||
|
||
def posterize(self, bits: int) -> Video: | ||
output = self._F.posterize_Video_tensor(self, bits=bits) | ||
return Video.new_like(self, output) | ||
|
||
def solarize(self, threshold: float) -> Video: | ||
output = self._F.solarize_Video_tensor(self, threshold=threshold) | ||
return Video.new_like(self, output) | ||
|
||
def autocontrast(self) -> Video: | ||
output = self._F.autocontrast_Video_tensor(self) | ||
return Video.new_like(self, output) | ||
|
||
def equalize(self) -> Video: | ||
output = self._F.equalize_Video_tensor(self) | ||
return Video.new_like(self, output) | ||
|
||
def invert(self) -> Video: | ||
output = self._F.invert_Video_tensor(self) | ||
return Video.new_like(self, output) | ||
|
||
def gaussian_blur(self, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Video: | ||
output = self._F.gaussian_blur_Video_tensor(self, kernel_size=kernel_size, sigma=sigma) | ||
return Video.new_like(self, output) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.