Skip to content

Commit 461bc8f

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Enable Dynamic shape support via tensor virtual and physical resizing (#2340)
Summary: X-link: pytorch/pytorch#121598 ## Context This changeset lays the foundations for supporting dynamic shapes in the ExecuTorch Vulkan delegate via allowing Tensors to be resized in one of two ways: 1. Discarding underlying `vkImage` or `vkBuffer` and reallocating a new `vkImage` or `vkBuffer` with updated sizes. This method is intended to be used when the current `vkImage` or `vkBuffer` is not large enough to contain the new sizes. 2. Update the tensor's size metadata without reallocating any new resources. This allows shaders to interpret the underlying `vkImage` or `vkBuffer` as if it were smaller than it actually is, and allows command buffers to be preserved when sizes are changed. Differential Revision: D54728401
1 parent ceb1f1d commit 461bc8f

10 files changed

+778
-239
lines changed
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
binary_op_nobroadcast__test:
2+
parameter_names_with_default_values:
3+
OPERATOR: X + Y
4+
shader_variants:
5+
- NAME: binary_add_nobroadcast__test
6+
OPERATOR: X + Y
7+
- NAME: binary_sub_nobroadcast__test
8+
OPERATOR: X - Y
9+
- NAME: binary_mul_nobroadcast__test
10+
OPERATOR: X * Y
11+
- NAME: binary_div_nobroadcast__test
12+
OPERATOR: X / Y
13+
- NAME: binary_pow_nobroadcast__test
14+
OPERATOR: pow(X, Y)
15+
16+
image_to_nchw__test:
17+
parameter_names_with_default_values:
18+
NDIM: 3
19+
DTYPE: float
20+
PACKING: CHANNELS_PACKED
21+
generate_variant_forall:
22+
DTYPE:
23+
- VALUE: "half"
24+
SUFFIX: "half"
25+
- VALUE: "float"
26+
SUFFIX: "float"
27+
shader_variants:
28+
- NAME: image3d_to_nchw__test_C_packed
29+
30+
nchw_to_image__test:
31+
parameter_names_with_default_values:
32+
NDIM: 3
33+
DTYPE: float
34+
PACKING: CHANNELS_PACKED
35+
generate_variant_forall:
36+
DTYPE:
37+
- VALUE: "half"
38+
SUFFIX: "half"
39+
- VALUE: "float"
40+
SUFFIX: "float"
41+
shader_variants:
42+
- NAME: nchw_to_image3d__test_C_packed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#version 450 core
2+
// clang-format off
3+
#define PRECISION ${PRECISION}
4+
#define FORMAT ${FORMAT}
5+
6+
#define OP(X, Y) ${OPERATOR}
7+
// clang-format on
8+
9+
layout(std430) buffer;
10+
11+
// clang-format off
12+
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D image_out;
13+
// clang-format on
14+
layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
15+
layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;
16+
17+
layout(set = 0, binding = 3) uniform PRECISION restrict OutExtents {
18+
uvec4 data;
19+
}
20+
out_extents;
21+
22+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
23+
24+
void main() {
25+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
26+
27+
if (any(greaterThanEqual(pos, out_extents.data.xyz))) {
28+
return;
29+
}
30+
31+
vec4 in_texel = texelFetch(image_in, pos, 0);
32+
vec4 other_texel = texelFetch(image_other, pos, 0);
33+
34+
imageStore(image_out, pos, OP(in_texel, other_texel));
35+
}

backends/vulkan/test/glsl/common.h

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#version 450 core
2+
#define PRECISION ${PRECISION}
3+
#define FORMAT ${FORMAT}
4+
5+
layout(std430) buffer;
6+
7+
/* Qualifiers: layout - storage - precision - memory */
8+
9+
// clang-format off
10+
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
11+
// clang-format on
12+
layout(set = 0, binding = 1) uniform PRECISION restrict Block {
13+
ivec3 size;
14+
int fill;
15+
vec4 vals;
16+
} params;
17+
18+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
19+
20+
void main() {
21+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
22+
23+
if (any(greaterThanEqual(pos, params.size))) {
24+
return;
25+
}
26+
27+
imageStore(uOutput, pos, params.vals);
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#version 450 core
2+
// clang-format off
3+
#define PRECISION ${PRECISION}
4+
// clang-format on
5+
6+
#include "indexing_utils.h"
7+
8+
layout(std430) buffer;
9+
10+
layout(set = 0, binding = 0) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} image_in;
11+
layout(set = 0, binding = 1) buffer PRECISION restrict writeonly Buffer {
12+
${T[DTYPE]} data[];
13+
}
14+
buffer_out;
15+
16+
layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
17+
ivec4 data;
18+
}
19+
gpu_sizes;
20+
21+
layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
22+
ivec4 data;
23+
}
24+
cpu_sizes;
25+
26+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
27+
28+
void main() {
29+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
30+
const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);
31+
32+
if (any(greaterThanEqual(coord, gpu_sizes.data))) {
33+
return;
34+
}
35+
36+
const ${VEC4_T[DTYPE]} intex = texelFetch(image_in, pos, 0);
37+
38+
const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
39+
const ivec4 buf_indices =
40+
base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);
41+
42+
if (coord.z < cpu_sizes.data.z) {
43+
buffer_out.data[buf_indices.x] = intex.x;
44+
}
45+
if (coord.z + 1 < cpu_sizes.data.z) {
46+
buffer_out.data[buf_indices.y] = intex.y;
47+
}
48+
if (coord.z + 2 < cpu_sizes.data.z) {
49+
buffer_out.data[buf_indices.z] = intex.z;
50+
}
51+
if (coord.z + 3 < cpu_sizes.data.z) {
52+
buffer_out.data[buf_indices.w] = intex.w;
53+
}
54+
}
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#define POS_TO_COORD_CHANNELS_PACKED(pos, sizes) \
10+
ivec4(pos.x, pos.y, (pos.z * 4) % sizes.z, (pos.z * 4) / sizes.z)
11+
12+
#define COORD_TO_BUFFER_IDX(coord, sizes) \
13+
coord.x + coord.y* sizes.x + coord.z* sizes.y* sizes.x + \
14+
coord.w* sizes.z* sizes.y* sizes.x;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#version 450 core
2+
// clang-format off
3+
#define PRECISION ${PRECISION}
4+
// clang-format on
5+
6+
#include "indexing_utils.h"
7+
8+
layout(std430) buffer;
9+
10+
// clang-format off
11+
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
12+
// clang-format on
13+
layout(set = 0, binding = 1) buffer PRECISION restrict readonly Buffer {
14+
${T[DTYPE]} data[];
15+
}
16+
buffer_in;
17+
18+
layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
19+
ivec4 data;
20+
}
21+
gpu_sizes;
22+
23+
layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
24+
ivec4 data;
25+
}
26+
cpu_sizes;
27+
28+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
29+
30+
void main() {
31+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
32+
const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);
33+
34+
if (any(greaterThanEqual(coord, gpu_sizes.data))) {
35+
return;
36+
}
37+
38+
const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
39+
const ivec4 buf_indices =
40+
base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);
41+
42+
${T[DTYPE]} val_x = buffer_in.data[buf_indices.x];
43+
${T[DTYPE]} val_y = buffer_in.data[buf_indices.y];
44+
${T[DTYPE]} val_z = buffer_in.data[buf_indices.z];
45+
${T[DTYPE]} val_w = buffer_in.data[buf_indices.w];
46+
47+
${VEC4_T[DTYPE]} texel = ${VEC4_T[DTYPE]}(val_x, val_y, val_z, val_w);
48+
49+
if (coord.z + 3 >= cpu_sizes.data.z) {
50+
ivec4 c_ind = ivec4(coord.z) + ivec4(0, 1, 2, 3);
51+
vec4 valid_c = vec4(lessThan(c_ind, ivec4(cpu_sizes.data.z)));
52+
texel = texel * valid_c;
53+
}
54+
55+
imageStore(image_out, ${GET_POS[NDIM]("pos")}, texel);
56+
}

0 commit comments

Comments
 (0)