Skip to content

Commit 3c352fd

Browse files
authored
Merge branch 'main' into update_nightly_2
2 parents 4afd911 + 87dd81a commit 3c352fd

File tree

9 files changed

+165
-66
lines changed

9 files changed

+165
-66
lines changed

backends/cadence/hifi/operators/op_add.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,21 @@ Tensor& add_out(
138138
if ((out_type != ScalarType::Float) || (alpha_val != 1.0))
139139
optimized = 0;
140140

141-
if ((a_dim == 0) || (b_dim == 0))
142-
optimized = 0;
141+
bool float_types =
142+
(a_type == ScalarType::Float) && (b_type == ScalarType::Float);
143+
144+
if ((a_dim == 0) && float_types) {
145+
for (int i = 0; i < max_dim; i++)
146+
out.mutable_data_ptr<float>()[i] =
147+
a.const_data_ptr<float>()[0] + b.const_data_ptr<float>()[i];
148+
return out;
149+
}
150+
if ((b_dim == 0) && float_types) {
151+
for (int i = 0; i < max_dim; i++)
152+
out.mutable_data_ptr<float>()[i] =
153+
a.const_data_ptr<float>()[i] + b.const_data_ptr<float>()[0];
154+
return out;
155+
}
143156

144157
if ((broadcast == 1) && (max_dim > kNnlibMaxDim))
145158
optimized = 0;

backends/cadence/hifi/operators/op_div.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,21 @@ div_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) {
8686
if ((a_type != ScalarType::Float) || (b_type != ScalarType::Float))
8787
optimized = 0;
8888

89-
if ((a_dim == 0) || (b_dim == 0))
90-
optimized = 0;
89+
bool float_types =
90+
(a_type == ScalarType::Float) && (b_type == ScalarType::Float);
91+
92+
if ((a_dim == 0) && float_types) {
93+
for (int i = 0; i < max_dim; i++)
94+
out.mutable_data_ptr<float>()[i] =
95+
a.const_data_ptr<float>()[0] / b.const_data_ptr<float>()[i];
96+
return out;
97+
}
98+
if ((b_dim == 0) && float_types) {
99+
for (int i = 0; i < max_dim; i++)
100+
out.mutable_data_ptr<float>()[i] =
101+
a.const_data_ptr<float>()[i] / b.const_data_ptr<float>()[0];
102+
return out;
103+
}
91104

92105
if ((broadcast == 1) && (max_dim > kNnlibMaxDim))
93106
optimized = 0;
@@ -201,8 +214,21 @@ Tensor& div_out_mode(
201214
if ((a_type != ScalarType::Float) || (b_type != ScalarType::Float))
202215
optimized = 0;
203216

204-
if ((a_dim == 0) || (b_dim == 0))
205-
optimized = 0;
217+
bool float_types =
218+
(a_type == ScalarType::Float) && (b_type == ScalarType::Float);
219+
220+
if ((a_dim == 0) && float_types) {
221+
for (int i = 0; i < max_dim; i++)
222+
out.mutable_data_ptr<float>()[i] =
223+
a.const_data_ptr<float>()[0] / b.const_data_ptr<float>()[i];
224+
return out;
225+
}
226+
if ((b_dim == 0) && float_types) {
227+
for (int i = 0; i < max_dim; i++)
228+
out.mutable_data_ptr<float>()[i] =
229+
a.const_data_ptr<float>()[i] / b.const_data_ptr<float>()[0];
230+
return out;
231+
}
206232

207233
if ((broadcast == 1) && (max_dim > kNnlibMaxDim))
208234
optimized = 0;

backends/cadence/hifi/operators/op_mul.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,23 @@ mul_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) {
104104
int max_dim = a.dim() > b.dim() ? a.dim() : b.dim();
105105
max_dim = out.dim() > max_dim ? out.dim() : max_dim;
106106

107-
if ((a_type != ScalarType::Float) || (b_type != ScalarType::Float))
108-
optimized = 0;
107+
bool float_types =
108+
(a_type == ScalarType::Float) && (b_type == ScalarType::Float);
109+
110+
if ((a_dim == 0) && float_types) {
111+
for (int i = 0; i < max_dim; i++)
112+
out.mutable_data_ptr<float>()[i] =
113+
a.const_data_ptr<float>()[0] * b.const_data_ptr<float>()[i];
114+
return out;
115+
}
116+
if ((b_dim == 0) && float_types) {
117+
for (int i = 0; i < max_dim; i++)
118+
out.mutable_data_ptr<float>()[i] =
119+
a.const_data_ptr<float>()[i] * b.const_data_ptr<float>()[0];
120+
return out;
121+
}
109122

110-
if ((a_dim == 0) || (b_dim == 0))
123+
if ((a_type != ScalarType::Float) || (b_type != ScalarType::Float))
111124
optimized = 0;
112125

113126
if ((broadcast == 1) && (max_dim > kNnlibMaxDim))

backends/cadence/hifi/operators/op_sub.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,21 @@ Tensor& sub_out(
133133
if ((out_type != ScalarType::Float) || (alpha_val != 1.0))
134134
optimized = 0;
135135

136-
if ((a_dim == 0) || (b_dim == 0))
137-
optimized = 0;
136+
bool float_types =
137+
(a_type == ScalarType::Float) && (b_type == ScalarType::Float);
138+
139+
if ((a_dim == 0) && float_types) {
140+
for (int i = 0; i < max_dim; i++)
141+
out.mutable_data_ptr<float>()[i] =
142+
a.const_data_ptr<float>()[0] - b.const_data_ptr<float>()[i];
143+
return out;
144+
}
145+
if ((b_dim == 0) && float_types) {
146+
for (int i = 0; i < max_dim; i++)
147+
out.mutable_data_ptr<float>()[i] =
148+
a.const_data_ptr<float>()[i] - b.const_data_ptr<float>()[0];
149+
return out;
150+
}
138151

139152
if ((broadcast == 1) && (max_dim > kNnlibMaxDim))
140153
optimized = 0;

backends/vulkan/op_registry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,6 @@ def register_view_op(features: OpFeatures):
528528
exir_ops.edge.aten.index_select.default,
529529
exir_ops.edge.aten.select_copy.int,
530530
# Tensor combination
531-
exir_ops.edge.aten.split_with_sizes_copy.default,
532-
exir_ops.edge.aten.split.Tensor,
533531
exir_ops.edge.aten.repeat.default,
534532
# Tensor creation
535533
exir_ops.edge.aten.arange.start_step,
@@ -563,6 +561,8 @@ def register_ported_op(features: OpFeatures):
563561
exir_ops.edge.aten.permute_copy.default,
564562
# Tensor combination
565563
exir_ops.edge.aten.cat.default,
564+
exir_ops.edge.aten.split_with_sizes_copy.default,
565+
exir_ops.edge.aten.split.Tensor,
566566
]
567567
)
568568
def register_ported_op_all_packed_dims(features: OpFeatures):

backends/vulkan/runtime/graph/ops/impl/Split.cpp

Lines changed: 43 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ void add_split_with_sizes_default_node(
2525
ValueRef out_list_ref) {
2626
vTensorPtr t_in = graph.get_tensor(in);
2727

28-
VK_CHECK_COND(check_packed_dim_is(*t_in, WHCN::kChannelsDim));
29-
3028
ValueListPtr out_list = graph.get_value_list(out_list_ref);
3129

3230
DimIndex dim_index = normalize_to_dim_index(*t_in, dim);
@@ -38,62 +36,60 @@ void add_split_with_sizes_default_node(
3836
ValueRef out_ref = (*out_list)[split_idx];
3937

4038
vTensorPtr t_out = graph.get_tensor(out_ref);
41-
VK_CHECK_COND(check_packed_dim_is(*t_out, WHCN::kChannelsDim));
4239
VK_CHECK_COND(dim_at(*t_out, dim_index) == split_size);
4340
}
4441

45-
if (dim_index == kWidth4D) {
46-
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
47-
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);
42+
const auto packed_dim = t_in->packed_dim();
43+
const auto packed_dim_index = static_cast<DimIndex>(kWidth4D - packed_dim);
4844

49-
for (ValueRef out_ref : *out_list) {
50-
// Doesn't need to use split_size since we have already verified that the
51-
// output tensor's size matches with the split_size.
52-
vTensorPtr t_out = graph.get_tensor(out_ref);
53-
utils::ivec3 range = t_out->logical_limits();
54-
add_copy_offset_node(
55-
graph, in, range, src_offset, dst_offset, out_ref, false, true);
45+
// Index of dimension to be concatenated in (w, h, c * b) coordinate system
46+
const auto dim_xyz_index = std::min(2, -dim_index - 1);
5647

57-
src_offset[0] += range[0];
58-
}
59-
} else if (dim_index == kHeight4D) {
60-
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
61-
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);
48+
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
49+
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);
6250

63-
for (ValueRef out_ref : *out_list) {
64-
vTensorPtr t_out = graph.get_tensor(out_ref);
65-
utils::ivec3 range = t_out->logical_limits();
66-
add_copy_offset_node(
67-
graph, in, range, src_offset, dst_offset, out_ref, false, true);
51+
const bool is_splitting_channel = (dim_index == kChannel4D);
6852

69-
src_offset[1] += range[1];
70-
}
71-
} else if (dim_index == kBatch4D) {
72-
utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false);
73-
utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false);
53+
// if splitting channels
54+
if (is_splitting_channel) {
55+
// set source offset w as channel size of the input tensor
56+
src_offset[3] = dim_at(t_in->sizes(), kChannel4D);
57+
}
7458

75-
for (ValueRef out_ref : *out_list) {
76-
vTensorPtr t_out = graph.get_tensor(out_ref);
77-
utils::ivec3 range = t_out->logical_limits();
59+
for (ValueRef out_ref : *out_list) {
60+
// Doesn't need to use split_size since we have already verified that the
61+
// output tensor's size matches with the split_size.
62+
vTensorPtr t_out = graph.get_tensor(out_ref);
63+
const auto out_channel_size = dim_at(t_out->sizes(), kChannel4D);
64+
utils::ivec3 range = t_out->logical_limits();
65+
66+
if (dim_index == packed_dim_index) {
67+
// if splitting channels, use add_copy_channel_offset_node function as
68+
// add_copy_packed_dim_offset_node does not support channel packing
69+
if (is_splitting_channel) {
70+
add_copy_channel_offset_node(
71+
graph, in, out_channel_size, src_offset[2], dst_offset[2], out_ref);
72+
src_offset[dim_xyz_index] += out_channel_size;
73+
} else {
74+
// dst_offset[3] is not used now but will be used in the future when
75+
// add_copy_packed_dim_offset_node will support channel packing
76+
//
77+
// set destination offset w as channel size of the output tensor if
78+
// splitting channel
79+
dst_offset[3] = is_splitting_channel ? out_channel_size : 0;
80+
add_copy_packed_dim_offset_node(
81+
graph, in, range, src_offset, dst_offset, out_ref);
82+
src_offset[dim_xyz_index] += dim_at(t_out->sizes(), packed_dim_index);
83+
}
84+
} else {
85+
// set destination offset w as channel size of the output tensor if
86+
// splitting channels
87+
dst_offset[3] = is_splitting_channel ? out_channel_size : 0;
7888
add_copy_offset_node(
7989
graph, in, range, src_offset, dst_offset, out_ref, false, true);
80-
81-
src_offset[2] += range[2];
82-
}
83-
} else if (dim_index == kChannel4D) {
84-
int32_t src_offset = 0;
85-
int32_t dst_offset = 0;
86-
87-
for (ValueRef out_ref : *out_list) {
88-
vTensorPtr t_out = graph.get_tensor(out_ref);
89-
int32_t range = dim_at<kChannel4D>(t_out->sizes());
90-
add_copy_channel_offset_node(
91-
graph, in, range, src_offset, dst_offset, out_ref);
92-
src_offset += range;
90+
src_offset[dim_xyz_index] +=
91+
is_splitting_channel ? out_channel_size : range[dim_xyz_index];
9392
}
94-
95-
} else {
96-
VK_THROW("not ipmlemented");
9793
}
9894
}
9995

backends/vulkan/test/op_tests/cases.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -922,30 +922,41 @@ def get_split_with_sizes_inputs():
922922
Test = namedtuple("VkSliceTest", ["self", "sizes", "dim"])
923923
test_cases = [
924924
# Split on Width
925+
Test(self=(S1, 7, 10, 11), sizes=[1, 3, 2, 5], dim=3),
925926
Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=3),
927+
Test(self=(7, 10, 11), sizes=[1, 3, 2, 5], dim=2),
926928
Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
929+
Test(self=(7, 10, 11), sizes=[3, 8], dim=2),
927930
Test(self=(7, 10, 10), sizes=[1, 9], dim=2),
928931
Test(self=(10, 10), sizes=[1, 9], dim=1),
929932
Test(self=(10,), sizes=[1, 9], dim=0),
930933
# Split on Height
934+
Test(self=(S1, 7, 11, 10), sizes=[1, 3, 2, 5], dim=2),
931935
Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
936+
Test(self=(7, 11, 10), sizes=[1, 3, 2, 5], dim=1),
932937
Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=1),
938+
Test(self=(7, 11, 11), sizes=[3, 8], dim=1),
933939
Test(self=(7, 10, 10), sizes=[10], dim=1),
934940
Test(self=(7, 6, 10), sizes=[1, 1, 1, 1, 1, 1], dim=1),
935941
Test(self=(10, 10), sizes=[1, 2, 3, 4], dim=0),
936942
# Split on Batch
937943
Test(self=(10, 7, 10, 10), sizes=[3, 6, 1], dim=0),
938944
Test(self=(10, 7, 10, 10), sizes=[10], dim=0),
939945
# Split on Channel
946+
Test(self=(7, 13, 4, 8), sizes=[3, 5, 2, 3], dim=1),
940947
Test(self=(7, 13, 4, 8), sizes=[3, 6, 1, 3], dim=1),
948+
Test(self=(7, 13, 4, 8), sizes=[3, 2, 2, 5, 1], dim=1),
941949
Test(self=(7, 13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=1),
950+
Test(self=(13, 4, 8), sizes=[3, 5, 2, 1, 2], dim=0),
942951
Test(self=(13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=0),
943952
Test(self=(13, 4, 8), sizes=[2, 9, 2], dim=0),
944953
Test(self=(13, 4, 8), sizes=[13], dim=0),
945954
]
946955
test_suite = VkTestSuite([tuple(tc) for tc in test_cases])
947956

948957
test_suite.layouts = [
958+
"utils::kWidthPacked",
959+
"utils::kHeightPacked",
949960
"utils::kChannelsPacked",
950961
]
951962
test_suite.data_gen = "make_seq_tensor"
@@ -997,6 +1008,8 @@ def get_split_tensor_inputs():
9971008
)
9981009

9991010
test_suite.layouts = [
1011+
"utils::kWidthPacked",
1012+
"utils::kHeightPacked",
10001013
"utils::kChannelsPacked",
10011014
]
10021015
test_suite.data_gen = "make_seq_tensor"

exir/emit/test/test_emit.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,26 +1534,49 @@ def forward(self, x):
15341534
self.assertEqual(len(program.constant_buffer[1].storage), 8)
15351535

15361536
def test_emit_lifted_tensor_constant(self) -> None:
1537-
class LiftedConstants(nn.Module):
1537+
class LiftedTensorConstants(nn.Module):
15381538
def __init__(self):
15391539
super().__init__()
15401540

15411541
def forward(self, x):
15421542
x = x * torch.tensor([[4, 3], [1, 2], [5, 6]], dtype=torch.float)
15431543
return x
15441544

1545-
model = LiftedConstants()
1545+
model = LiftedTensorConstants()
1546+
# Specify that we want to move non-lifted constants to external file
1547+
et_cfg = ExecutorchBackendConfig(external_constants=True)
1548+
program = to_edge(
1549+
export(model, (torch.ones(3, 2),), strict=True)
1550+
).to_executorch(et_cfg)
1551+
program = program._emitter_output.program
1552+
exec_plan = program.execution_plan[0]
1553+
# There should only be 1 input to this model.
1554+
self.assertEqual(len(exec_plan.inputs), 1)
1555+
self.assertEqual(len(program.constant_buffer), 2)
1556+
self.assertEqual(len(program.constant_buffer[1].storage), 24)
15461557

1558+
def test_emit_lifted_constant(self) -> None:
1559+
class LiftedConstants(nn.Module):
1560+
def __init__(self):
1561+
super().__init__()
1562+
1563+
def forward(self, x):
1564+
x = x + 1
1565+
return x
1566+
1567+
model = LiftedConstants()
1568+
# Specify that we want to move non-lifted constants to external file
1569+
et_cfg = ExecutorchBackendConfig(external_constants=True)
15471570
program = to_edge(
15481571
export(model, (torch.ones(3, 2),), strict=True)
1549-
).to_executorch()
1572+
).to_executorch(et_cfg)
15501573

15511574
program = program._emitter_output.program
15521575
exec_plan = program.execution_plan[0]
15531576
# There should only be 1 input to this model.
15541577
self.assertEqual(len(exec_plan.inputs), 1)
15551578
self.assertEqual(len(program.constant_buffer), 2)
1556-
self.assertEqual(len(program.constant_buffer[1].storage), 24)
1579+
self.assertEqual(len(program.constant_buffer[1].storage), 8)
15571580

15581581
def test_mutable_buffers(self) -> None:
15591582
def count_copies(gm: torch.fx.GraphModule) -> int:

exir/passes/external_constants_pass.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,17 @@ def external_constants_pass(
1717
gm: GraphModule,
1818
) -> PassResult:
1919
"""
20-
Move all constants to external file.
20+
Move all non-lifted constants to external file.
21+
NOTE: Lifted constants are not moved as they are closer
22+
to code than data.
2123
"""
2224
mutated = False
2325
for module in gm.modules():
2426
if not isinstance(module, torch.fx.GraphModule):
2527
continue
2628

2729
for node in module.graph.nodes:
28-
if node.op == "placeholder":
30+
if (node.op == "placeholder") and ("_lifted_tensor" not in node.name):
2931
spec = node.meta.get("spec")
3032
if isinstance(spec, TensorSpec) and spec.const:
3133
node.meta["constant_tag"] = "_default_external_constant"

0 commit comments

Comments
 (0)