Skip to content

add Video feature and kernels #6667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4874907
add video feature
pmeier Sep 29, 2022
a1b00b4
add video kernels
pmeier Sep 29, 2022
e7a229c
add video testing utils
pmeier Sep 29, 2022
5d8b8b6
add one kernel info
pmeier Sep 29, 2022
2380f10
Merge branch 'main' into video
pmeier Oct 4, 2022
a04d667
fix kernel names in Video feature
pmeier Oct 4, 2022
35642b9
use only uint8 for video testing
pmeier Oct 4, 2022
ae59458
require at least 4 dims for Video feature
pmeier Oct 4, 2022
0fb1c35
add TODO for image_size -> spatial_size
pmeier Oct 4, 2022
2d1e560
image -> video in feature constructor
pmeier Oct 4, 2022
91e15b2
introduce new combined images and video type
pmeier Oct 4, 2022
81237fe
add video to transform utils
pmeier Oct 5, 2022
aa26292
fix transforms test
pmeier Oct 5, 2022
93d7556
fix auto augment
pmeier Oct 5, 2022
6df2f0f
Merge branch 'main' into video
pmeier Oct 5, 2022
a99765d
cleanup
pmeier Oct 5, 2022
17ee7f7
Merge branch 'main' into video
pmeier Oct 6, 2022
4506cdf
address review comments
pmeier Oct 6, 2022
36f52dc
add remaining video kernel infos
pmeier Oct 6, 2022
0d2ad96
add batch dimension squashing to some kernels
pmeier Oct 6, 2022
f1e2bfa
fix tests and kernel infos
pmeier Oct 6, 2022
93fc321
add xfails for arbitrary batch sizes on some kernels
pmeier Oct 6, 2022
f843612
Merge branch 'main' into video
pmeier Oct 6, 2022
ad4d424
Merge branch 'main' into video
pmeier Oct 7, 2022
d8945e6
fix test setup
pmeier Oct 7, 2022
1c86193
fix equalize_image_tensor for multi batch dims
pmeier Oct 7, 2022
1c2b615
fix adjust_sharpness_image_tensor for multi batch dims
pmeier Oct 7, 2022
7f3a8b7
Merge branch 'video' of https://github.com/pmeier/vision into video
pmeier Oct 7, 2022
2d7b07d
address review comments
pmeier Oct 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 69 additions & 12 deletions test/prototype_common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
"make_segmentation_masks",
"make_mask_loaders",
"make_masks",
"make_video",
"make_videos",
]


Expand Down Expand Up @@ -210,17 +212,19 @@ def _parse_image_size(size, *, name="size"):

def from_loader(loader_fn):
def wrapper(*args, **kwargs):
device = kwargs.pop("device", "cpu")
loader = loader_fn(*args, **kwargs)
return loader.load(kwargs.get("device", "cpu"))
return loader.load(device)

return wrapper


def from_loaders(loaders_fn):
def wrapper(*args, **kwargs):
device = kwargs.pop("device", "cpu")
loaders = loaders_fn(*args, **kwargs)
for loader in loaders:
yield loader.load(kwargs.get("device", "cpu"))
yield loader.load(device)

return wrapper

Expand All @@ -246,6 +250,21 @@ def __post_init__(self):
self.num_channels = self.shape[-3]


NUM_CHANNELS_MAP = {
features.ColorSpace.GRAY: 1,
features.ColorSpace.GRAY_ALPHA: 2,
features.ColorSpace.RGB: 3,
features.ColorSpace.RGB_ALPHA: 4,
}


def get_num_channels(color_space):
num_channels = NUM_CHANNELS_MAP.get(color_space)
if not num_channels:
raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}")
return num_channels


def make_image_loader(
size="random",
*,
Expand All @@ -255,16 +274,7 @@ def make_image_loader(
constant_alpha=True,
):
size = _parse_image_size(size)

try:
num_channels = {
features.ColorSpace.GRAY: 1,
features.ColorSpace.GRAY_ALPHA: 2,
features.ColorSpace.RGB: 3,
features.ColorSpace.RGB_ALPHA: 4,
}[color_space]
except KeyError as error:
raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}") from error
num_channels = get_num_channels(color_space)

def fn(shape, dtype, device):
max_value = get_max_value(dtype)
Expand Down Expand Up @@ -531,3 +541,50 @@ def make_mask_loaders(


make_masks = from_loaders(make_mask_loaders)


class VideoLoader(ImageLoader):
pass


def make_video_loader(
size="random",
*,
color_space=features.ColorSpace.RGB,
num_frames="random",
extra_dims=(),
dtype=torch.uint8,
):
size = _parse_image_size(size)
num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames

def fn(shape, dtype, device):
video = make_image(size=shape[-2:], color_space=color_space, extra_dims=shape[:-3], dtype=dtype, device=device)
return features.Video(video, color_space=color_space)

return VideoLoader(
fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype, color_space=color_space
)


make_video = from_loader(make_video_loader)


def make_video_loaders(
*,
sizes=DEFAULT_IMAGE_SIZES,
color_spaces=(
features.ColorSpace.GRAY,
features.ColorSpace.RGB,
),
num_frames=(1, 0, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
):
for params in combinations_grid(
size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes
):
yield make_video_loader(**params)


make_videos = from_loaders(make_video_loaders)
20 changes: 20 additions & 0 deletions test/prototype_transforms_dispatcher_infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,23 @@ def fill_sequence_needs_broadcast(args_kwargs):
)


def xfail_all_tests(*, reason, condition):
return [
TestMark(("TestDispatchers", test_name), pytest.mark.xfail(reason=reason), condition=condition)
for test_name in [
"test_scripted_smoke",
"test_dispatch_simple_tensor",
"test_dispatch_feature",
]
]


xfails_degenerate_or_multi_batch_dims = xfail_all_tests(
reason="See https://github.com/pytorch/vision/issues/6670 for details.",
condition=lambda args_kwargs: len(args_kwargs.args[0].shape) > 4 or not all(args_kwargs.args[0].shape[:-3]),
)


DISPATCHER_INFOS = [
DispatcherInfo(
F.horizontal_flip,
Expand Down Expand Up @@ -243,6 +260,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
pil_kernel_info=PILKernelInfo(F.perspective_image_pil),
test_marks=[
xfail_dispatch_pil_if_fill_sequence_needs_broadcast,
*xfails_degenerate_or_multi_batch_dims,
],
),
DispatcherInfo(
Expand All @@ -253,6 +271,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
features.Mask: F.elastic_mask,
},
pil_kernel_info=PILKernelInfo(F.elastic_image_pil),
test_marks=xfails_degenerate_or_multi_batch_dims,
),
DispatcherInfo(
F.center_crop,
Expand All @@ -275,6 +294,7 @@ def fill_sequence_needs_broadcast(args_kwargs):
test_marks=[
xfail_jit_python_scalar_arg("kernel_size"),
xfail_jit_python_scalar_arg("sigma"),
*xfails_degenerate_or_multi_batch_dims,
],
),
DispatcherInfo(
Expand Down
Loading