Skip to content

Commit 57b8653

Browse files
baijumeswanikleiti
authored andcommitted
ResizeGrad CUDA/ROCM kernel implementation (microsoft#17772)
1 parent 42ea38d commit 57b8653

16 files changed

Lines changed: 605 additions & 25 deletions

onnxruntime/python/tools/symbolic_shape_infer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
230230
"upsample_nearest1d": self._infer_aten_upsample,
231231
"upsample_nearest2d": self._infer_aten_upsample,
232232
"upsample_nearest3d": self._infer_aten_upsample,
233-
"upsample_bilinear2d": self._infer_aten_upsample,
234233
}
235234
self.run_ = True
236235
self.suggested_merge_ = {}

orttraining/orttraining/core/graph/gradient_builder.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,5 +2147,13 @@ IMPLEMENT_GRADIENT_BUILDER(GetScaledSumGradient) {
21472147
ORT_THROW("ScaledSum gradient builder does not support ", input_count, " inputs");
21482148
}
21492149

2150+
IMPLEMENT_GRADIENT_BUILDER(GetResizeGradient) {
2151+
return std::vector<NodeDef>{
2152+
NodeDef(OpDef{"ResizeGrad", kMSDomain, 1},
2153+
{GO(0), I(0), I(1), I(2)},
2154+
{GI(0)},
2155+
SrcNodeAttributes())};
2156+
}
2157+
21502158
} // namespace training
21512159
} // namespace onnxruntime

orttraining/orttraining/core/graph/gradient_builder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ DECLARE_GRADIENT_BUILDER(GetGRUGradient)
9090
DECLARE_GRADIENT_BUILDER(GetReciprocalGradient)
9191
DECLARE_GRADIENT_BUILDER(GetLeakyReluGradient)
9292
DECLARE_GRADIENT_BUILDER(GetConvTransposeGradient)
93+
DECLARE_GRADIENT_BUILDER(GetResizeGradient)
9394

9495
DECLARE_GRADIENT_BUILDER(GetExternalGradient)
9596

orttraining/orttraining/core/graph/gradient_builder_registry.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ void GradientBuilderRegistry::RegisterGradientBuilders() {
122122
REGISTER_GRADIENT_BUILDER("Reciprocal", GetReciprocalGradient);
123123
REGISTER_GRADIENT_BUILDER("LeakyRelu", GetLeakyReluGradient);
124124
REGISTER_GRADIENT_BUILDER("ConvTranspose", GetConvTransposeGradient);
125+
REGISTER_GRADIENT_BUILDER("Resize", GetResizeGradient);
125126

126127
REGISTER_GRADIENT_BUILDER("ExternalGradient", GetExternalGradient);
127128
};

orttraining/orttraining/core/graph/training_op_defs.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5001,6 +5001,26 @@ Return true if all elements are true and false otherwise.
50015001
"T",
50025002
{"tensor(float16)", "tensor(float)", "tensor(double)"},
50035003
"Constrain input and output types to float tensors.");
5004+
5005+
ONNX_CONTRIB_OPERATOR_SCHEMA(ResizeGrad)
5006+
.SetDomain(kMSDomain)
5007+
.SinceVersion(1)
5008+
.Input(0, "dY", "Gradient of output Y.", "T")
5009+
.Input(1, "X", "Input tensor to the Resize operator.", "T")
5010+
.Input(2, "roi", "The roi input to the Resize operator.", "T", OpSchema::Optional)
5011+
.Input(3, "scales", "The scales input to the Resize operator.", "tensor(float)", OpSchema::Optional)
5012+
.Output(0, "dX", "Gradient of the input X.", "T")
5013+
.AllowUncheckedAttributes()
5014+
.TypeConstraint(
5015+
"T",
5016+
{"tensor(float16)", "tensor(float)", "tensor(double)"},
5017+
"Constrain input and output types to float tensors.")
5018+
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
5019+
propagateElemTypeFromInputToOutput(ctx, 1, 0);
5020+
if (hasInputShape(ctx, 1)) {
5021+
propagateShapeFromInputToOutput(ctx, 1, 0);
5022+
}
5023+
});
50045024
}
50055025

50065026
} // namespace training

orttraining/orttraining/python/training/ortmodule/_custom_gradient_registry.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,3 @@ def upsample_nearest2d_gradient():
271271
@register_gradient("org.pytorch.aten", "ATen", "upsample_nearest3d", "vec")
272272
def upsample_nearest3d_gradient():
273273
return _upsample_gradient("upsample_nearest3d_backward", 3)
274-
275-
276-
@register_gradient("org.pytorch.aten", "ATen", "upsample_bilinear2d", "vec")
277-
def upsample_bilinear2d_gradient():
278-
return _upsample_gradient("upsample_bilinear2d_backward", 2)

orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -808,16 +808,3 @@ def upsample_nearest2d(g, input, output_size, scale_factors):
808808
@register_symbolic("upsample_nearest3d")
809809
def upsample_nearest3d(g, input, output_size, scale_factors):
810810
return _upsample_nearest(g, input, output_size, scale_factors, "upsample_nearest3d")
811-
812-
813-
@register_symbolic("upsample_bilinear2d")
814-
def upsample_bilinear2d(g, input, output_size, align_corners, scale_factors):
815-
return g.op(
816-
"org.pytorch.aten::ATen",
817-
input,
818-
output_size,
819-
align_corners,
820-
scale_factors,
821-
operator_s="upsample_bilinear2d",
822-
overload_name_s="vec",
823-
)

orttraining/orttraining/test/gradient/gradient_ops_test.cc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3298,6 +3298,41 @@ TEST(GradientCheckerTest, ConvTransposeGrad) {
32983298
execution_providers.push_back(DefaultCudaExecutionProvider());
32993299
ConvTransposeGradientCheckerTest(&execution_providers);
33003300
}
3301+
3302+
// TODO: Enable test for ROCM
3303+
TEST(GradientCheckerTest, ResizeGrad) {
3304+
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
3305+
execution_providers.push_back(DefaultCudaExecutionProvider());
3306+
const std::vector<ONNX_NAMESPACE::AttributeProto> attributes = {
3307+
MakeAttribute("coordinate_transformation_mode", "half_pixel"),
3308+
MakeAttribute("cubic_coeff_a", -0.75f),
3309+
MakeAttribute("exclude_outside", static_cast<int64_t>(0)),
3310+
MakeAttribute("extrapolation_value", 0.0f),
3311+
MakeAttribute("mode", "linear"),
3312+
MakeAttribute("nearest_mode", "floor")};
3313+
3314+
float max_error;
3315+
GradientChecker<float, float, float> gradient_checker;
3316+
OpDef op_def{"Resize", kOnnxDomain, 18};
3317+
3318+
TensorInfo x_info({1, 2, 4, 4}, true);
3319+
TensorInfo roi_info({4}, false, nullptr, DataTypeImpl::GetTensorType<float>());
3320+
TensorInfo scales_info({4}, false, nullptr, DataTypeImpl::GetTensorType<float>());
3321+
3322+
TensorInfo y_info({1, 2, 8, 8}, true);
3323+
3324+
std::vector<std::vector<float>> x_datas = {{0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f,
3325+
0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f,
3326+
0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f,
3327+
0.2f, 0.4f, 0.6f, 0.8f, 0.2f, 0.4f, 0.6f, 0.8f},
3328+
{1.0f, 1.0f, 1.0f, 1.0f},
3329+
{1.0f, 1.0f, 2.0f, 2.0f}};
3330+
3331+
ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def, {x_info, roi_info, scales_info},
3332+
{y_info}, &max_error, x_datas, attributes, true, false, &execution_providers));
3333+
EXPECT_IS_TINY(max_error);
3334+
}
3335+
33013336
#endif // USE_CUDA
33023337

33033338
} // namespace test

orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,13 +1773,17 @@ def run_step(model, input):
17731773
_test_helpers.assert_values_are_close(ort_input.grad, pt_input.grad)
17741774

17751775

1776-
def test_aten_upsample_bilinear():
1776+
@pytest.mark.parametrize("interpolate_size_scale", ({"size": (8, 12)}, {"scale_factor": 4.7}))
1777+
@pytest.mark.parametrize("align_corners", (True, False))
1778+
def test_resize_grad_correctness_bilinear_2d(interpolate_size_scale, align_corners):
17771779
class _NeuralNetUpsampleBilinear(torch.nn.Module):
17781780
def __init__(self):
17791781
super().__init__()
17801782

17811783
def forward(self, input):
1782-
return torch.nn.functional.interpolate(input, size=(8, 12), mode="bilinear")
1784+
return torch.nn.functional.interpolate(
1785+
input, align_corners=align_corners, mode="bilinear", **interpolate_size_scale
1786+
)
17831787

17841788
device = "cuda"
17851789
pt_model = _NeuralNetUpsampleBilinear().to(device)
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#include "test/providers/compare_provider_test_utils.h"
5+
#include "test/providers/provider_test_utils.h"
6+
#include "test/util/include/default_providers.h"
7+
8+
namespace onnxruntime::test {
9+
10+
#if defined(USE_CUDA) || defined(USE_ROCM)
11+
12+
namespace {
13+
14+
void AddResizeGradAttributes(OpTester& test, const std::string& coordinate_transformation_mode) {
15+
test.AddAttribute<std::string>("mode", "linear");
16+
test.AddAttribute<std::string>("coordinate_transformation_mode", coordinate_transformation_mode);
17+
}
18+
19+
} // namespace
20+
21+
TEST(ResizeGradTest, ResizeGradWithSizes) {
22+
std::vector<std::unique_ptr<IExecutionProvider>> providers;
23+
#ifdef USE_CUDA
24+
providers.emplace_back(DefaultCudaExecutionProvider());
25+
#elif USE_ROCM
26+
providers.emplace_back(DefaultRocmExecutionProvider());
27+
#endif
28+
29+
OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
30+
31+
AddResizeGradAttributes(test, "half_pixel");
32+
33+
std::vector<float> dY(128, 1.0f);
34+
std::vector<int64_t> dY_shape = {1, 2, 8, 8};
35+
36+
std::vector<float> X(32, 1.0f);
37+
std::vector<int64_t> X_shape = {1, 2, 4, 4};
38+
39+
std::vector<float> dX(32, 4.0f);
40+
std::vector<int64_t> dX_shape = X_shape;
41+
42+
test.AddInput<float>("dY", dY_shape, dY);
43+
test.AddInput<float>("X", X_shape, X);
44+
45+
test.AddOutput<float>("dX", dX_shape, dX);
46+
47+
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
48+
}
49+
50+
TEST(ResizeGradTest, ResizeGradWithSizesHalf) {
51+
std::vector<std::unique_ptr<IExecutionProvider>> providers;
52+
#ifdef USE_CUDA
53+
providers.emplace_back(DefaultCudaExecutionProvider());
54+
#elif USE_ROCM
55+
providers.emplace_back(DefaultRocmExecutionProvider());
56+
#endif
57+
58+
OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
59+
60+
AddResizeGradAttributes(test, "half_pixel");
61+
62+
std::vector<float> dY(128, 1.0f);
63+
std::vector<MLFloat16> dY_half(dY.size());
64+
ConvertFloatToMLFloat16(dY.data(), dY_half.data(), static_cast<int>(dY.size()));
65+
std::vector<int64_t> dY_shape = {1, 2, 8, 8};
66+
67+
std::vector<float> X(32, 1.0f);
68+
std::vector<MLFloat16> X_half(X.size());
69+
ConvertFloatToMLFloat16(X.data(), X_half.data(), static_cast<int>(X.size()));
70+
std::vector<int64_t> X_shape = {1, 2, 4, 4};
71+
72+
std::vector<float> dX(32, 4.0f);
73+
std::vector<MLFloat16> dX_half(dX.size());
74+
ConvertFloatToMLFloat16(dX.data(), dX_half.data(), static_cast<int>(dX.size()));
75+
std::vector<int64_t> dX_shape = X_shape;
76+
77+
test.AddInput<MLFloat16>("dY", dY_shape, dY_half);
78+
test.AddInput<MLFloat16>("X", X_shape, X_half);
79+
80+
test.AddOutput<MLFloat16>("dX", dX_shape, dX_half);
81+
82+
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
83+
}
84+
85+
TEST(ResizeGradTest, ResizeGradWithSizesAndAlignCorners) {
86+
std::vector<std::unique_ptr<IExecutionProvider>> providers;
87+
#ifdef USE_CUDA
88+
providers.emplace_back(DefaultCudaExecutionProvider());
89+
#elif USE_ROCM
90+
providers.emplace_back(DefaultRocmExecutionProvider());
91+
#endif
92+
93+
OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
94+
95+
AddResizeGradAttributes(test, "align_corners");
96+
97+
std::vector<float> dY(128, 1.0f);
98+
std::vector<int64_t> dY_shape = {1, 2, 8, 8};
99+
100+
std::vector<float> X(32, 1.0f);
101+
std::vector<int64_t> X_shape = {1, 2, 4, 4};
102+
103+
std::vector<float> dX({2.9388f, 3.9184f, 3.9184f, 2.9388f, 3.9184f, 5.2245f, 5.2245f, 3.9184f,
104+
3.9184f, 5.2245f, 5.2245f, 3.9184f, 2.9388f, 3.9184f, 3.9184f, 2.9388f,
105+
2.9388f, 3.9184f, 3.9184f, 2.9388f, 3.9184f, 5.2245f, 5.2245f, 3.9184f,
106+
3.9184f, 5.2245f, 5.2245f, 3.9184f, 2.9388f, 3.9184f, 3.9184f, 2.9388f});
107+
std::vector<int64_t> dX_shape = X_shape;
108+
109+
test.AddInput<float>("dY", dY_shape, dY);
110+
test.AddInput<float>("X", X_shape, X);
111+
112+
test.AddOutput<float>("dX", dX_shape, dX);
113+
114+
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
115+
}
116+
117+
TEST(ResizeGradTest, ResizeGradWithScales) {
118+
std::vector<std::unique_ptr<IExecutionProvider>> providers;
119+
#ifdef USE_CUDA
120+
providers.emplace_back(DefaultCudaExecutionProvider());
121+
#elif USE_ROCM
122+
providers.emplace_back(DefaultRocmExecutionProvider());
123+
#endif
124+
125+
OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
126+
127+
AddResizeGradAttributes(test, "half_pixel");
128+
129+
std::vector<float> dY(72, 1.0f);
130+
std::vector<int64_t> dY_shape = {1, 2, 6, 6};
131+
132+
std::vector<float> X(32, 1.0f);
133+
std::vector<int64_t> X_shape = {1, 2, 4, 4};
134+
135+
std::vector<float> dX({2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
136+
2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f,
137+
2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
138+
2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f});
139+
std::vector<int64_t> dX_shape = X_shape;
140+
141+
test.AddInput<float>("dY", dY_shape, dY);
142+
test.AddInput<float>("X", X_shape, X);
143+
test.AddInput<float>("", {0}, {});
144+
test.AddInput<float>("scales", {4}, {1.0f, 1.0f, 1.7f, 1.7f});
145+
146+
test.AddOutput<float>("dX", dX_shape, dX);
147+
148+
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
149+
}
150+
151+
TEST(ResizeGradTest, ResizeGradWithScalesHalf) {
152+
std::vector<std::unique_ptr<IExecutionProvider>> providers;
153+
#ifdef USE_CUDA
154+
providers.emplace_back(DefaultCudaExecutionProvider());
155+
#elif USE_ROCM
156+
providers.emplace_back(DefaultRocmExecutionProvider());
157+
#endif
158+
159+
OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
160+
161+
AddResizeGradAttributes(test, "half_pixel");
162+
163+
std::vector<float> dY(72, 1.0f);
164+
std::vector<MLFloat16> dY_half(dY.size());
165+
ConvertFloatToMLFloat16(dY.data(), dY_half.data(), static_cast<int>(dY.size()));
166+
std::vector<int64_t> dY_shape = {1, 2, 6, 6};
167+
168+
std::vector<float> X(32, 1.0f);
169+
std::vector<MLFloat16> X_half(X.size());
170+
ConvertFloatToMLFloat16(X.data(), X_half.data(), static_cast<int>(X.size()));
171+
std::vector<int64_t> X_shape = {1, 2, 4, 4};
172+
173+
std::vector<float> dX({2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
174+
2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f,
175+
2.7128f, 2.9550f, 2.7612f, 1.4533f, 2.9550f, 3.2189f, 3.0078f, 1.5830f,
176+
2.7612f, 3.0078f, 2.8106f, 1.4792f, 1.4533f, 1.5830f, 1.4792f, 0.7785f});
177+
std::vector<MLFloat16> dX_half(dX.size());
178+
ConvertFloatToMLFloat16(dX.data(), dX_half.data(), static_cast<int>(dX.size()));
179+
std::vector<int64_t> dX_shape = X_shape;
180+
181+
test.AddInput<MLFloat16>("dY", dY_shape, dY_half);
182+
test.AddInput<MLFloat16>("X", X_shape, X_half);
183+
test.AddInput<float>("", {0}, {});
184+
test.AddInput<float>("scales", {4}, {1.0f, 1.0f, 1.7f, 1.7f});
185+
186+
test.AddOutput<MLFloat16>("dX", dX_shape, dX_half);
187+
188+
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
189+
}
190+
191+
TEST(ResizeGradTest, ResizeGradWithScalesAndAlignCorners) {
192+
std::vector<std::unique_ptr<IExecutionProvider>> providers;
193+
#ifdef USE_CUDA
194+
providers.emplace_back(DefaultCudaExecutionProvider());
195+
#elif USE_ROCM
196+
providers.emplace_back(DefaultRocmExecutionProvider());
197+
#endif
198+
199+
OpTester test("ResizeGrad", 1, onnxruntime::kMSDomain);
200+
201+
AddResizeGradAttributes(test, "align_corners");
202+
203+
std::vector<float> dY(72, 1.0f);
204+
std::vector<int64_t> dY_shape = {1, 2, 6, 6};
205+
206+
std::vector<float> X(32, 1.0f);
207+
std::vector<int64_t> X_shape = {1, 2, 4, 4};
208+
209+
std::vector<float> dX({1.9600f, 2.2400f, 2.2400f, 1.9600f, 2.2400f, 2.5600f, 2.5600f, 2.2400f,
210+
2.2400f, 2.5600f, 2.5600f, 2.2400f, 1.9600f, 2.2400f, 2.2400f, 1.9600f,
211+
1.9600f, 2.2400f, 2.2400f, 1.9600f, 2.2400f, 2.5600f, 2.5600f, 2.2400f,
212+
2.2400f, 2.5600f, 2.5600f, 2.2400f, 1.9600f, 2.2400f, 2.2400f, 1.9600f});
213+
std::vector<int64_t> dX_shape = X_shape;
214+
215+
test.AddInput<float>("dY", dY_shape, dY);
216+
test.AddInput<float>("X", X_shape, X);
217+
test.AddInput<float>("", {0}, {});
218+
test.AddInput<float>("scales", {4}, {1.0f, 1.0f, 1.7f, 1.7f});
219+
220+
test.AddOutput<float>("dX", dX_shape, dX);
221+
222+
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &providers);
223+
}
224+
225+
#endif // defined(USE_CUDA) || defined(USE_ROCM)
226+
227+
} // namespace onnxruntime::test

0 commit comments

Comments
 (0)