Skip to content

Commit acb8e12

Browse files
committed
all: new blocked layouts for 3D brgemm matmul B matrix
1 parent 0abbf22 commit acb8e12

15 files changed

+216
-67
lines changed

include/oneapi/dnnl/dnnl.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2016-2022 Intel Corporation
2+
* Copyright 2016-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -1900,6 +1900,18 @@ struct memory : public handle<dnnl_memory_t> {
19001900
BA16a48b4a = dnnl_BA16a48b4a,
19011901
BA16a64b4a = dnnl_BA16a64b4a,
19021902
decbA16a = dnnl_decbA16a,
1903+
aCB16b16c = dnnl_aCB16b16c,
1904+
aCB16b32c = dnnl_aCB16b32c,
1905+
aCB16b48c = dnnl_aCB16b48c,
1906+
aCB16b64c = dnnl_aCB16b64c,
1907+
aCB16b16c2b = dnnl_aCB16b16c2b,
1908+
aCB16b32c2b = dnnl_aCB16b32c2b,
1909+
aCB16b48c2b = dnnl_aCB16b48c2b,
1910+
aCB16b64c2b = dnnl_aCB16b64c2b,
1911+
aCB16b16c4b = dnnl_aCB16b16c4b,
1912+
aCB16b32c4b = dnnl_aCB16b32c4b,
1913+
aCB16b48c4b = dnnl_aCB16b48c4b,
1914+
aCB16b64c4b = dnnl_aCB16b64c4b,
19031915

19041916
format_tag_last = dnnl_format_tag_last,
19051917

include/oneapi/dnnl/dnnl_types.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2016-2022 Intel Corporation
2+
* Copyright 2016-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -723,6 +723,18 @@ typedef enum {
723723
dnnl_aBdfec16b,
724724
dnnl_abdEC64e2c,
725725
dnnl_abdEC64e4c,
726+
dnnl_aCB16b16c,
727+
dnnl_aCB16b32c,
728+
dnnl_aCB16b48c,
729+
dnnl_aCB16b64c,
730+
dnnl_aCB16b16c2b,
731+
dnnl_aCB16b32c2b,
732+
dnnl_aCB16b48c2b,
733+
dnnl_aCB16b64c2b,
734+
dnnl_aCB16b16c4b,
735+
dnnl_aCB16b32c4b,
736+
dnnl_aCB16b48c4b,
737+
dnnl_aCB16b64c4b,
726738

727739
/// Just a sentinel, not real memory format tag. Must be changed after new
728740
/// format tag is added.

src/common/c_types_map.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2016-2022 Intel Corporation
2+
* Copyright 2016-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -288,6 +288,18 @@ const format_tag_t BA16a16b4a = dnnl_BA16a16b4a;
288288
const format_tag_t BA16a32b4a = dnnl_BA16a32b4a;
289289
const format_tag_t BA16a48b4a = dnnl_BA16a48b4a;
290290
const format_tag_t BA16a64b4a = dnnl_BA16a64b4a;
291+
const format_tag_t aCB16b16c = dnnl_aCB16b16c;
292+
const format_tag_t aCB16b32c = dnnl_aCB16b32c;
293+
const format_tag_t aCB16b48c = dnnl_aCB16b48c;
294+
const format_tag_t aCB16b64c = dnnl_aCB16b64c;
295+
const format_tag_t aCB16b16c2b = dnnl_aCB16b16c2b;
296+
const format_tag_t aCB16b32c2b = dnnl_aCB16b32c2b;
297+
const format_tag_t aCB16b48c2b = dnnl_aCB16b48c2b;
298+
const format_tag_t aCB16b64c2b = dnnl_aCB16b64c2b;
299+
const format_tag_t aCB16b16c4b = dnnl_aCB16b16c4b;
300+
const format_tag_t aCB16b32c4b = dnnl_aCB16b32c4b;
301+
const format_tag_t aCB16b48c4b = dnnl_aCB16b48c4b;
302+
const format_tag_t aCB16b64c4b = dnnl_aCB16b64c4b;
291303

292304
const format_tag_t Abc16a = dnnl_Abc16a;
293305
const format_tag_t ABc16a16b = dnnl_ABc16a16b;

src/common/dnnl_debug_autogenerated.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2018-2022 Intel Corporation
2+
* Copyright 2018-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -579,6 +579,18 @@ const char *dnnl_fmt_tag2str(dnnl_format_tag_t v) {
579579
if (v == dnnl_aBdfec16b) return "aBdfec16b";
580580
if (v == dnnl_abdEC64e2c) return "abdEC64e2c";
581581
if (v == dnnl_abdEC64e4c) return "abdEC64e4c";
582+
if (v == dnnl_aCB16b16c) return "aCB16b16c";
583+
if (v == dnnl_aCB16b32c) return "aCB16b32c";
584+
if (v == dnnl_aCB16b48c) return "aCB16b48c";
585+
if (v == dnnl_aCB16b64c) return "aCB16b64c";
586+
if (v == dnnl_aCB16b16c2b) return "aCB16b16c2b";
587+
if (v == dnnl_aCB16b32c2b) return "aCB16b32c2b";
588+
if (v == dnnl_aCB16b48c2b) return "aCB16b48c2b";
589+
if (v == dnnl_aCB16b64c2b) return "aCB16b64c2b";
590+
if (v == dnnl_aCB16b16c4b) return "aCB16b16c4b";
591+
if (v == dnnl_aCB16b32c4b) return "aCB16b32c4b";
592+
if (v == dnnl_aCB16b48c4b) return "aCB16b48c4b";
593+
if (v == dnnl_aCB16b64c4b) return "aCB16b64c4b";
582594
if (v == dnnl_format_tag_last) return "format_tag_last";
583595
if (v == dnnl_x) return "x";
584596
if (v == dnnl_nc) return "nc";

src/common/memory_desc_wrapper.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2016-2022 Intel Corporation
2+
* Copyright 2016-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -195,6 +195,18 @@ status_t memory_desc_wrapper::compute_blocking(
195195
C(BA16a32b4a, {1, 0}, {16, 32, 4}, {0, 1, 0});
196196
C(BA16a48b4a, {1, 0}, {16, 48, 4}, {0, 1, 0});
197197
C(BA16a64b4a, {1, 0}, {16, 64, 4}, {0, 1, 0});
198+
C(aCB16b16c, {0, 2, 1}, {16, 16}, {1, 2});
199+
C(aCB16b32c, {0, 2, 1}, {16, 32}, {1, 2});
200+
C(aCB16b48c, {0, 2, 1}, {16, 48}, {1, 2});
201+
C(aCB16b64c, {0, 2, 1}, {16, 64}, {1, 2});
202+
C(aCB16b16c2b, {0, 2, 1}, {16, 16, 2}, {1, 2, 1});
203+
C(aCB16b32c2b, {0, 2, 1}, {16, 32, 2}, {1, 2, 1});
204+
C(aCB16b48c2b, {0, 2, 1}, {16, 48, 2}, {1, 2, 1});
205+
C(aCB16b64c2b, {0, 2, 1}, {16, 64, 2}, {1, 2, 1});
206+
C(aCB16b16c4b, {0, 2, 1}, {16, 16, 4}, {1, 2, 1});
207+
C(aCB16b32c4b, {0, 2, 1}, {16, 32, 4}, {1, 2, 1});
208+
C(aCB16b48c4b, {0, 2, 1}, {16, 48, 4}, {1, 2, 1});
209+
C(aCB16b64c4b, {0, 2, 1}, {16, 64, 4}, {1, 2, 1});
198210

199211
C(ABc4b16a4b, {0, 1, 2}, {4, 16, 4}, {1, 0, 1});
200212
C(ABc4b32a4b, {0, 1, 2}, {4, 32, 4}, {1, 0, 1});

src/common/memory_desc_wrapper.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2016-2022 Intel Corporation
2+
* Copyright 2016-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -127,7 +127,7 @@ struct memory_desc_wrapper : public c_compatible {
127127
using namespace memory_extra_flags;
128128

129129
auto calculate_size = [=](int cmask, size_t buff_data_size) {
130-
assert(utils::one_of(cmask, 1, 2, 3, 13, 27));
130+
assert(utils::one_of(cmask, 1, 2, 3, 5, 13, 27));
131131
dim_t prod = 1;
132132
for (int d = 0; d < ndims(); ++d)
133133
if (cmask & (1 << d)) { prod *= padded_dims()[d]; }

src/common/tag_traits.hpp

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2018-2022 Intel Corporation
2+
* Copyright 2018-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -127,6 +127,16 @@ enum class inner_blk_t {
127127
_16c32b4c,
128128
_16c48b4c,
129129
_16c64b4c,
130+
_16b32c,
131+
_16b48c,
132+
_16b64c,
133+
_16b32c2b,
134+
_16b48c2b,
135+
_16b64c2b,
136+
_16b16c4b,
137+
_16b32c4b,
138+
_16b48c4b,
139+
_16b64c4b,
130140
};
131141

132142
/** returns the offset within the block for weights blocked over oc and ic */
@@ -150,7 +160,9 @@ constexpr int AB_or_BC_blk_off(int x0, int x1) {
150160
ib::_4a8b8a4b, ib::_4b8c8b4c, ib::_16b32a2b, ib::_16b48a2b,
151161
ib::_16b64a2b, ib::_16b32a4b, ib::_16b48a4b, ib::_16b64a4b,
152162
ib::_16c32b2c, ib::_16c48b2c, ib::_16c64b2c, ib::_16c32b4c,
153-
ib::_16c48b4c, ib::_16c64b4c),
163+
ib::_16c48b4c, ib::_16c64b4c, ib::_16b32c, ib::_16b48c,
164+
ib::_16b64c, ib::_16b32c2b, ib::_16b48c2b, ib::_16b64c2b,
165+
ib::_16b16c4b, ib::_16b32c4b, ib::_16b48c4b, ib::_16b64c4b),
154166
"unexpected inner_blk format");
155167

156168
// clang-format off
@@ -165,17 +177,17 @@ constexpr int AB_or_BC_blk_off(int x0, int x1) {
165177
: (f == ib::_16b16a || f == ib::_16c16b) ? 16 * x1 + x0
166178
: (f == ib::_16a2b || f == ib::_16b2c) ? 2 * x0 + x1
167179
: (f == ib::_16a4b || f == ib::_16b4c) ? 4 * x0 + x1
168-
: (f == ib::_32a32b || f == ib::_16a32b) ? 32 * x0 + x1
180+
: (utils::one_of(f, ib::_32a32b, ib::_16a32b, ib::_16b32c)) ? 32 * x0 + x1
169181
: (utils::one_of(f, ib::_8a16b2a, ib::_8b16c2b, ib::_16a16b2a, ib::_16b16c2b)) ? (x0 / 2) * 32 + x1 * 2 + x0 % 2
170-
: (f == ib::_16a48b) ? x0 * 48 + x1
171-
: (f == ib::_16a64b) ? x0 * 64 + x1
172-
: (f == ib::_16a32b2a) ? (x0 / 2) * 64 + x1 * 2 + x0 % 2
173-
: (f == ib::_16a48b2a) ? (x0 / 2) * 96 + x1 * 2 + x0 % 2
174-
: (f == ib::_16a64b2a) ? (x0 / 2) * 128 + x1 * 2 + x0 % 2
175-
: (f == ib::_16a16b4a) ? (x0 / 4) * 64 + x1 * 4 + x0 % 4
176-
: (f == ib::_16a32b4a) ? (x0 / 4) * 128 + x1 * 4 + x0 % 4
177-
: (f == ib::_16a48b4a) ? (x0 / 4) * 192 + x1 * 4 + x0 % 4
178-
: (f == ib::_16a64b4a) ? (x0 / 4) * 256 + x1 * 4 + x0 % 4
182+
: (utils::one_of(f, ib::_16a48b, ib::_16b48c)) ? x0 * 48 + x1
183+
: (utils::one_of(f, ib::_16a64b, ib::_16b64c)) ? x0 * 64 + x1
184+
: (utils::one_of(f, ib::_16a32b2a, ib::_16b32c2b)) ? (x0 / 2) * 64 + x1 * 2 + x0 % 2
185+
: (utils::one_of(f, ib::_16a48b2a, ib::_16b48c2b)) ? (x0 / 2) * 96 + x1 * 2 + x0 % 2
186+
: (utils::one_of(f, ib::_16a64b2a, ib::_16b64c2b)) ? (x0 / 2) * 128 + x1 * 2 + x0 % 2
187+
: (utils::one_of(f, ib::_16a16b4a, ib::_16b16c4b)) ? (x0 / 4) * 64 + x1 * 4 + x0 % 4
188+
: (utils::one_of(f, ib::_16a32b4a, ib::_16b32c4b)) ? (x0 / 4) * 128 + x1 * 4 + x0 % 4
189+
: (utils::one_of(f, ib::_16a48b4a, ib::_16b48c4b)) ? (x0 / 4) * 192 + x1 * 4 + x0 % 4
190+
: (utils::one_of(f, ib::_16a64b4a, ib::_16b64c4b)) ? (x0 / 4) * 256 + x1 * 4 + x0 % 4
179191
: (f == ib::_4b16a4b || f == ib::_4c16b4c) ? (x1 / 4) * 64 + x0 * 4 + x1 % 4
180192
: (f == ib::_4b32a4b) ? (x1 / 4) * 128 + x0 * 4 + x1 % 4
181193
: (f == ib::_4b64a4b) ? (x1 / 4) * 256 + x0 * 4 + x1 % 4
@@ -336,6 +348,18 @@ DECL_TRAITS(BA16a16b4a, _AB, _16a16b4a, 2);
336348
DECL_TRAITS(BA16a32b4a, _AB, _16a32b4a, 2);
337349
DECL_TRAITS(BA16a48b4a, _AB, _16a48b4a, 2);
338350
DECL_TRAITS(BA16a64b4a, _AB, _16a64b4a, 2);
351+
DECL_TRAITS(aCB16b16c, _BC, _16b16c, 2);
352+
DECL_TRAITS(aCB16b32c, _BC, _16b32c, 2);
353+
DECL_TRAITS(aCB16b48c, _BC, _16b48c, 2);
354+
DECL_TRAITS(aCB16b64c, _BC, _16b64c, 2);
355+
DECL_TRAITS(aCB16b16c2b, _BC, _16b16c2b, 3);
356+
DECL_TRAITS(aCB16b32c2b, _BC, _16b32c2b, 3);
357+
DECL_TRAITS(aCB16b48c2b, _BC, _16b48c2b, 3);
358+
DECL_TRAITS(aCB16b64c2b, _BC, _16b64c2b, 3);
359+
DECL_TRAITS(aCB16b16c4b, _BC, _16b16c4b, 3);
360+
DECL_TRAITS(aCB16b32c4b, _BC, _16b32c4b, 3);
361+
DECL_TRAITS(aCB16b48c4b, _BC, _16b48c4b, 3);
362+
DECL_TRAITS(aCB16b64c4b, _BC, _16b64c4b, 3);
339363

340364
DECL_TRAITS(Abc16a, _A, _16a, 3);
341365
DECL_TRAITS(ABc16a16b, _AB, _16a16b, 3);

src/cpu/reorder/cpu_reorder_comp_bf16_s8.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2020-2022 Intel Corporation
2+
* Copyright 2020-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -79,6 +79,14 @@ const impl_list_map_t &comp_bf16_s8_impl_list_map() {
7979
DNNL_NON_X64_ONLY(REG_SR(bf16, iwo, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
8080
DNNL_NON_X64_ONLY(REG_SR(bf16, oiw, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
8181
DNNL_NON_X64_ONLY(REG_SR(bf16, wio, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
82+
REG_SR(bf16, abc, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
83+
REG_SR(bf16, abc, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
84+
REG_SR(bf16, abc, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
85+
REG_SR(bf16, abc, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
86+
REG_SR(bf16, acb, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
87+
REG_SR(bf16, acb, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
88+
REG_SR(bf16, acb, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
89+
REG_SR(bf16, acb, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
8290
nullptr,
8391
}},
8492
{{bf16, s8, 4}, {

src/cpu/reorder/cpu_reorder_comp_f32_s8.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2020-2022 Intel Corporation
2+
* Copyright 2020-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -71,6 +71,14 @@ const impl_list_map_t &comp_f32_s8_impl_list_map() {
7171
DNNL_NON_X64_ONLY(REG_SR(f32, iwo, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
7272
DNNL_NON_X64_ONLY(REG_SR(f32, oiw, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
7373
DNNL_NON_X64_ONLY(REG_SR(f32, wio, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
74+
REG_SR(f32, abc, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
75+
REG_SR(f32, abc, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
76+
REG_SR(f32, abc, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
77+
REG_SR(f32, abc, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
78+
REG_SR(f32, acb, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
79+
REG_SR(f32, acb, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
80+
REG_SR(f32, acb, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
81+
REG_SR(f32, acb, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
7482
nullptr,
7583
}},
7684
{{f32, s8, 4}, {

src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2020-2022 Intel Corporation
2+
* Copyright 2020-2023 Intel Corporation
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -72,6 +72,14 @@ const impl_list_map_t &comp_s8_s8_impl_list_map() {
7272
DNNL_NON_X64_ONLY(REG_SR(s8, iwo, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
7373
DNNL_NON_X64_ONLY(REG_SR(s8, oiw, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
7474
DNNL_NON_X64_ONLY(REG_SR(s8, wio, s8, OIw16i16o4i, fmt_order::keep, spec::conv_req_comp))
75+
REG_SR(s8, abc, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
76+
REG_SR(s8, abc, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
77+
REG_SR(s8, abc, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
78+
REG_SR(s8, abc, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
79+
REG_SR(s8, acb, s8, aCB16b16c4b, fmt_order::keep, spec::conv_req_comp)
80+
REG_SR(s8, acb, s8, aCB16b32c4b, fmt_order::keep, spec::conv_req_comp)
81+
REG_SR(s8, acb, s8, aCB16b48c4b, fmt_order::keep, spec::conv_req_comp)
82+
REG_SR(s8, acb, s8, aCB16b64c4b, fmt_order::keep, spec::conv_req_comp)
7583
nullptr,
7684
}},
7785
{{s8, s8, 4}, {

0 commit comments

Comments
 (0)