Skip to content

Commit a6a726e

Browse files
SS-JIAfacebook-github-bot
authored andcommitted
Enable Dynamic shape support via tensor virtual and physical resizing (#2340)
Summary: X-link: pytorch/pytorch#121598 ## Context This changeset lays the foundations for supporting dynamic shapes in the ExecuTorch Vulkan delegate via allowing Tensors to be resized in one of two ways: 1. Discarding underlying `vkImage` or `vkBuffer` and reallocating a new `vkImage` or `vkBuffer` with updated sizes. This method is intended to be used when the current `vkImage` or `vkBuffer` is not large enough to contain the new sizes. 2. Update the tensor's size metadata without reallocating any new resources. This allows shaders to interpret the underlying `vkImage` or `vkBuffer` as if it were smaller than it actually is, and allows command buffers to be preserved when sizes are changed. bypass-github-export-checks Reviewed By: jorgep31415 Differential Revision: D54728401
1 parent 70c5be3 commit a6a726e

10 files changed

+803
-239
lines changed
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
binary_op_nobroadcast__test:
2+
parameter_names_with_default_values:
3+
OPERATOR: X + Y
4+
shader_variants:
5+
- NAME: binary_add_nobroadcast__test
6+
OPERATOR: X + Y
7+
- NAME: binary_sub_nobroadcast__test
8+
OPERATOR: X - Y
9+
- NAME: binary_mul_nobroadcast__test
10+
OPERATOR: X * Y
11+
- NAME: binary_div_nobroadcast__test
12+
OPERATOR: X / Y
13+
- NAME: binary_pow_nobroadcast__test
14+
OPERATOR: pow(X, Y)
15+
16+
image_to_nchw__test:
17+
parameter_names_with_default_values:
18+
NDIM: 3
19+
DTYPE: float
20+
PACKING: CHANNELS_PACKED
21+
generate_variant_forall:
22+
DTYPE:
23+
- VALUE: "half"
24+
SUFFIX: "half"
25+
- VALUE: "float"
26+
SUFFIX: "float"
27+
shader_variants:
28+
- NAME: image3d_to_nchw__test_C_packed
29+
30+
nchw_to_image__test:
31+
parameter_names_with_default_values:
32+
NDIM: 3
33+
DTYPE: float
34+
PACKING: CHANNELS_PACKED
35+
generate_variant_forall:
36+
DTYPE:
37+
- VALUE: "half"
38+
SUFFIX: "half"
39+
- VALUE: "float"
40+
SUFFIX: "float"
41+
shader_variants:
42+
- NAME: nchw_to_image3d__test_C_packed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
// clang-format off
11+
#define PRECISION ${PRECISION}
12+
#define FORMAT ${FORMAT}
13+
14+
#define OP(X, Y) ${OPERATOR}
15+
// clang-format on
16+
17+
layout(std430) buffer;
18+
19+
// clang-format off
20+
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D image_out;
21+
// clang-format on
22+
layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
23+
layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;
24+
25+
layout(set = 0, binding = 3) uniform PRECISION restrict OutExtents {
26+
uvec4 data;
27+
}
28+
out_extents;
29+
30+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
31+
32+
void main() {
33+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
34+
35+
if (any(greaterThanEqual(pos, out_extents.data.xyz))) {
36+
return;
37+
}
38+
39+
vec4 in_texel = texelFetch(image_in, pos, 0);
40+
vec4 other_texel = texelFetch(image_other, pos, 0);
41+
42+
imageStore(image_out, pos, OP(in_texel, other_texel));
43+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
#define PRECISION ${PRECISION}
11+
#define FORMAT ${FORMAT}
12+
13+
layout(std430) buffer;
14+
15+
/* Qualifiers: layout - storage - precision - memory */
16+
17+
// clang-format off
18+
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
19+
// clang-format on
20+
layout(set = 0, binding = 1) uniform PRECISION restrict Block {
21+
ivec3 size;
22+
int fill;
23+
vec4 vals;
24+
} params;
25+
26+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
27+
28+
void main() {
29+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
30+
31+
if (any(greaterThanEqual(pos, params.size))) {
32+
return;
33+
}
34+
35+
imageStore(uOutput, pos, params.vals);
36+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
// clang-format off
11+
#define PRECISION ${PRECISION}
12+
// clang-format on
13+
14+
#include "indexing_utils.h"
15+
16+
layout(std430) buffer;
17+
18+
layout(set = 0, binding = 0) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} image_in;
19+
layout(set = 0, binding = 1) buffer PRECISION restrict writeonly Buffer {
20+
${T[DTYPE]} data[];
21+
}
22+
buffer_out;
23+
24+
layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
25+
ivec4 data;
26+
}
27+
gpu_sizes;
28+
29+
layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
30+
ivec4 data;
31+
}
32+
cpu_sizes;
33+
34+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
35+
36+
void main() {
37+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
38+
const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);
39+
40+
if (any(greaterThanEqual(coord, gpu_sizes.data))) {
41+
return;
42+
}
43+
44+
const ${VEC4_T[DTYPE]} intex = texelFetch(image_in, pos, 0);
45+
46+
const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
47+
const ivec4 buf_indices =
48+
base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);
49+
50+
if (coord.z < cpu_sizes.data.z) {
51+
buffer_out.data[buf_indices.x] = intex.x;
52+
}
53+
if (coord.z + 1 < cpu_sizes.data.z) {
54+
buffer_out.data[buf_indices.y] = intex.y;
55+
}
56+
if (coord.z + 2 < cpu_sizes.data.z) {
57+
buffer_out.data[buf_indices.z] = intex.z;
58+
}
59+
if (coord.z + 3 < cpu_sizes.data.z) {
60+
buffer_out.data[buf_indices.w] = intex.w;
61+
}
62+
}
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#define POS_TO_COORD_CHANNELS_PACKED(pos, sizes) \
10+
ivec4(pos.x, pos.y, (pos.z * 4) % sizes.z, (pos.z * 4) / sizes.z)
11+
12+
#define COORD_TO_BUFFER_IDX(coord, sizes) \
13+
coord.x + coord.y* sizes.x + coord.z* sizes.y* sizes.x + \
14+
coord.w* sizes.z* sizes.y* sizes.x;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
// clang-format off
11+
#define PRECISION ${PRECISION}
12+
// clang-format on
13+
14+
#include "indexing_utils.h"
15+
16+
layout(std430) buffer;
17+
18+
// clang-format off
19+
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
20+
// clang-format on
21+
layout(set = 0, binding = 1) buffer PRECISION restrict readonly Buffer {
22+
${T[DTYPE]} data[];
23+
}
24+
buffer_in;
25+
26+
layout(set = 0, binding = 2) uniform PRECISION restrict GpuSizes {
27+
ivec4 data;
28+
}
29+
gpu_sizes;
30+
31+
layout(set = 0, binding = 3) uniform PRECISION restrict CpuSizes {
32+
ivec4 data;
33+
}
34+
cpu_sizes;
35+
36+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
37+
38+
void main() {
39+
const ivec3 pos = ivec3(gl_GlobalInvocationID);
40+
const ivec4 coord = POS_TO_COORD_${PACKING}(pos, gpu_sizes.data);
41+
42+
if (any(greaterThanEqual(coord, gpu_sizes.data))) {
43+
return;
44+
}
45+
46+
const int base_index = COORD_TO_BUFFER_IDX(coord, cpu_sizes.data);
47+
const ivec4 buf_indices =
48+
base_index + ivec4(0, 1, 2, 3) * (gpu_sizes.data.x * gpu_sizes.data.y);
49+
50+
${T[DTYPE]} val_x = buffer_in.data[buf_indices.x];
51+
${T[DTYPE]} val_y = buffer_in.data[buf_indices.y];
52+
${T[DTYPE]} val_z = buffer_in.data[buf_indices.z];
53+
${T[DTYPE]} val_w = buffer_in.data[buf_indices.w];
54+
55+
${VEC4_T[DTYPE]} texel = ${VEC4_T[DTYPE]}(val_x, val_y, val_z, val_w);
56+
57+
if (coord.z + 3 >= cpu_sizes.data.z) {
58+
ivec4 c_ind = ivec4(coord.z) + ivec4(0, 1, 2, 3);
59+
vec4 valid_c = vec4(lessThan(c_ind, ivec4(cpu_sizes.data.z)));
60+
texel = texel * valid_c;
61+
}
62+
63+
imageStore(image_out, ${GET_POS[NDIM]("pos")}, texel);
64+
}

backends/vulkan/test/glsl/test_shader.glsl

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
19
#version 450 core
210
#define PRECISION ${PRECISION}
311
#define FORMAT ${FORMAT}

0 commit comments

Comments
 (0)