Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions src/cpu/cpu_convolution_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,40 @@ const std::map<pk_dt_impl_key_t, std::vector<impl_list_item_t>> &impl_list_map()
CPU_INSTANCE(ref_fused_convolution_fwd_t)
nullptr,
}},
{{forward, f16, f16, f32}, {
CPU_INSTANCE_AVX512(brdgmm_dw_convolution_fwd_t)
CPU_INSTANCE_X64(ip_convolution_fwd_t)
CPU_INSTANCE_AMX(brgemm_1x1_convolution_fwd_t, avx512_core_amx_fp16)
CPU_INSTANCE_AMX(brgemm_convolution_fwd_t, avx512_core_amx_fp16)
CPU_INSTANCE_AMX(brgemm_convolution_fwd_t, avx512_core_amx_fp16, true)
CPU_INSTANCE_AVX512(brgemm_1x1_convolution_fwd_t, avx512_core_fp16)
CPU_INSTANCE_AVX512(brgemm_convolution_fwd_t, avx512_core_fp16)
CPU_INSTANCE_AVX512(brgemm_convolution_fwd_t, avx512_core_fp16, true)
CPU_INSTANCE_AVX2(brgemm_1x1_convolution_fwd_t, avx2_vnni_2)
CPU_INSTANCE_AVX2(brgemm_convolution_fwd_t, avx2_vnni_2)
CPU_INSTANCE_AVX2(brgemm_convolution_fwd_t, avx2_vnni_2, true)
CPU_INSTANCE(ref_convolution_fwd_t)
nullptr,
}},
{{forward, f16, f16, f16}, {
CPU_INSTANCE_AVX512(brdgmm_dw_convolution_fwd_t)
CPU_INSTANCE_X64(ip_convolution_fwd_t)
CPU_INSTANCE_AMX(brgemm_1x1_convolution_fwd_t, avx512_core_amx_fp16)
CPU_INSTANCE_AMX(brgemm_convolution_fwd_t, avx512_core_amx_fp16)
CPU_INSTANCE_AMX(brgemm_convolution_fwd_t, avx512_core_amx_fp16, true)
CPU_INSTANCE_AVX512(brgemm_1x1_convolution_fwd_t, avx512_core_fp16)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wondering do we really need avx512_core_fp16 and avx2_vnni_2. Are we going to use them?

CPU_INSTANCE_AVX512(brgemm_convolution_fwd_t, avx512_core_fp16)
CPU_INSTANCE_AVX512(brgemm_convolution_fwd_t, avx512_core_fp16, true)
CPU_INSTANCE_AVX2(brgemm_1x1_convolution_fwd_t, avx2_vnni_2)
CPU_INSTANCE_AVX2(brgemm_convolution_fwd_t, avx2_vnni_2)
CPU_INSTANCE_AVX2(brgemm_convolution_fwd_t, avx2_vnni_2, true)
CPU_INSTANCE_ACL(acl_wino_convolution_fwd_t)
CPU_INSTANCE_ACL(acl_indirect_gemm_convolution_fwd_t)
CPU_INSTANCE_ACL(acl_gemm_convolution_fwd_t, f16)
CPU_INSTANCE(ref_convolution_fwd_t)
CPU_INSTANCE(ref_fused_convolution_fwd_t)
nullptr,
}},
// BWD_D fp
{{backward_data, f32, f32, f32}, REG_CONV_P({
CPU_INSTANCE_X64(ip_convolution_bwd_data_t)
Expand Down Expand Up @@ -220,6 +254,22 @@ const std::map<pk_dt_impl_key_t, std::vector<impl_list_item_t>> &impl_list_map()
CPU_INSTANCE(ref_convolution_bwd_data_t)
nullptr,
})},
{{backward_data, f32, f16, f16}, REG_BWD_D_PK({
CPU_INSTANCE_X64(ip_convolution_bwd_data_t)
CPU_INSTANCE_AMX(brgemm_convolution_bwd_strided_t<avx512_core_amx_fp16>)
CPU_INSTANCE_AVX512(brgemm_convolution_bwd_t<avx512_core_fp16>)
CPU_INSTANCE_AVX2(brgemm_convolution_bwd_t<avx2_vnni_2>)
CPU_INSTANCE(ref_convolution_bwd_data_t)
nullptr,
})},
{{backward_data, f16, f16, f16}, REG_BWD_D_PK({
CPU_INSTANCE_X64(ip_convolution_bwd_data_t)
CPU_INSTANCE_AMX(brgemm_convolution_bwd_strided_t<avx512_core_amx_fp16>)
CPU_INSTANCE_AVX512(brgemm_convolution_bwd_t<avx512_core_fp16>)
CPU_INSTANCE_AVX2(brgemm_convolution_bwd_t<avx2_vnni_2>)
CPU_INSTANCE(ref_convolution_bwd_data_t)
nullptr,
})},
// BWD_W fp
{{backward_weights, f32, f32, f32}, REG_BWD_PK({
CPU_INSTANCE_X64(ip_convolution_bwd_weights_t)
Expand Down
4 changes: 2 additions & 2 deletions src/cpu/cpu_deconvolution_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ using namespace dnnl::impl::prop_kind;
const std::map<pk_impl_key_t, std::vector<impl_list_item_t>> &impl_list_map() {
static const std::map<pk_impl_key_t, std::vector<impl_list_item_t>> the_map = REG_DECONV_P({
{{forward}, {
CPU_INSTANCE_AMX(brgemm_deconvolution_fwd_t<avx512_core_amx_fp16>)
CPU_INSTANCE_AMX(brgemm_deconvolution_fwd_t<avx512_core_amx>)
CPU_INSTANCE_AMX(brgemm_deconvolution_fwd_t, avx512_core_amx_fp16)
CPU_INSTANCE_AMX(brgemm_deconvolution_fwd_t, avx512_core_amx)
CPU_INSTANCE_AMX(jit_avx512_core_amx_deconvolution_fwd_t)
CPU_INSTANCE_AVX512(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t)
CPU_INSTANCE_AVX512(jit_avx512_core_x8s8s32x_deconvolution_fwd_t)
Expand Down
15 changes: 15 additions & 0 deletions src/cpu/cpu_inner_product_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ const std::map<pk_dt_impl_key_t, std::vector<impl_list_item_t>> &impl_list_map()
CPU_INSTANCE(ref_inner_product_fwd_t)
nullptr,
}},
{{forward, f16, f16, f32}, {
CPU_INSTANCE_AMX(brgemm_inner_product_fwd_t, avx512_core_amx_fp16)
CPU_INSTANCE_AVX512(brgemm_inner_product_fwd_t, avx512_core_fp16)
CPU_INSTANCE_AVX2(brgemm_inner_product_fwd_t, avx2_vnni_2)
CPU_INSTANCE(ref_inner_product_fwd_t)
nullptr,
}},
{{forward, f16, f16, f16}, {
CPU_INSTANCE_AMX(brgemm_inner_product_fwd_t, avx512_core_amx_fp16)
CPU_INSTANCE_AVX512(brgemm_inner_product_fwd_t, avx512_core_fp16)
CPU_INSTANCE_AVX2(brgemm_inner_product_fwd_t, avx2_vnni_2)
CPU_INSTANCE_ACL(acl_inner_product_fwd_t)
CPU_INSTANCE(ref_inner_product_fwd_t)
nullptr,
}},
{{backward_data, f32, f32, f32}, REG_BWD_PK({
CPU_INSTANCE_AMX(brgemm_inner_product_bwd_data_t, avx512_core_amx) // bf32
CPU_INSTANCE_AVX512(brgemm_inner_product_bwd_data_t, avx512_core)
Expand Down
2 changes: 2 additions & 0 deletions src/cpu/cpu_pooling_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,11 @@ const std::map<pk_impl_key_t, std::vector<impl_list_item_t>> &impl_list_map() {
static const std::map<pk_impl_key_t, std::vector<impl_list_item_t>> the_map = REG_POOLING_P({
{{forward}, {
/* fp */
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, avx512_core_fp16, f16)
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, avx512_core, bf16)
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, avx512_core, f32)
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, avx2_vnni_2, bf16)
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, avx2_vnni_2, f16)
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, avx2, f32)
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, avx, f32)
CPU_INSTANCE_X64(jit_uni_pooling_fwd_t, sse41, f32)
Expand Down
3 changes: 2 additions & 1 deletion src/cpu/matmul/cpu_matmul_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ using namespace dnnl::impl::cpu::matmul;
// constexpr impl_list_item_t impl_list[] = REG_MATMUL_P({
const impl_list_item_t impl_list[] = REG_MATMUL_P({
CPU_INSTANCE_ACL(acl_matmul_t)
CPU_INSTANCE_AMX(brgemm_matmul_t, avx512_core_amx_fp16)
CPU_INSTANCE_AMX(brgemm_matmul_t, avx512_core_amx)
CPU_INSTANCE_AVX512(brgemm_matmul_t, avx512_core)
CPU_INSTANCE(gemm_f32_matmul_t)
Expand All @@ -78,7 +79,7 @@ const impl_list_item_t impl_list[] = REG_MATMUL_P({
CPU_INSTANCE_AVX2(brgemm_matmul_t, avx2_vnni_2)
CPU_INSTANCE_AVX2(brgemm_matmul_t, avx2_vnni)
CPU_INSTANCE(gemm_x8s8s32x_matmul_t)
CPU_INSTANCE_AVX512(brgemm_matmul_t<avx512_core_fp16>)
CPU_INSTANCE_AVX512(brgemm_matmul_t, avx512_core_fp16)
CPU_INSTANCE(ref_matmul_t)
CPU_INSTANCE(ref_matmul_int8_t)
// These implementations are enabled only when DNNL_EXPERIMENTAL_SPARSE
Expand Down
2 changes: 2 additions & 0 deletions src/cpu/reorder/cpu_reorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ static const std::map<reorder_impl_key_t, const void *> &
regular_impl_list_map() {
static const std::map<reorder_impl_key_t, const void *> the_map = {
{{f32, bf16, 0}, &regular_f32_bf16_impl_list_map()},
{{f32, f16, 0}, &regular_f32_f16_impl_list_map()},
{{f32, f32, 0}, &regular_f32_f32_impl_list_map()},
{{f32, s32, 0}, &regular_f32_s32_impl_list_map()},
{{f32, s8, 0}, &regular_f32_s8_impl_list_map()},
{{f32, u8, 0}, &regular_f32_u8_impl_list_map()},
{{f32, bin, 0}, &regular_f32_bin_impl_list_map()},
{{bf16, data_type::undef, 0}, &regular_bf16_impl_list_map()},
{{f16, data_type::undef, 0}, &regular_f16_impl_list_map()},
{{s32, data_type::undef, 0}, &regular_s32_impl_list_map()},
{{s8, data_type::undef, 0}, &regular_s8_impl_list_map()},
{{u8, data_type::undef, 0}, &regular_u8_impl_list_map()},
Expand Down
4 changes: 2 additions & 2 deletions src/cpu/reorder/cpu_reorder_regular_f16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ const impl_list_map_t &regular_f16_impl_list_map() {
// f16 ->
{{f16, data_type::undef, 0}, {
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::brgemm_matmul_matrix_B_reorder_t))
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t))
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t))
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t))
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))
REG_SR(f16, any, f16, any, fmt_order_any, spec_reference)
REG_SR(f16, any, f32, any, fmt_order_any, spec_reference)
REG_SR(f16, any, s8, any, fmt_order_any, spec_reference)
Expand Down
4 changes: 2 additions & 2 deletions src/cpu/reorder/cpu_reorder_regular_f32_f16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ const impl_list_map_t &regular_f32_f16_impl_list_map() {
static const impl_list_map_t the_map = REG_REORDER_P({
// f32 -> f16
{{f32, f16, 0}, {
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_blk_reorder_t))
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t))
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_blk_reorder_t))
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64_jit_uni_reorder_t))

nullptr,
}},
Expand Down
1 change: 1 addition & 0 deletions src/cpu/x64/jit_uni_pool_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1638,6 +1638,7 @@ template struct jit_uni_pool_kernel<avx>;
template struct jit_uni_pool_kernel<avx2>;
template struct jit_uni_pool_kernel<avx2_vnni_2>;
template struct jit_uni_pool_kernel<avx512_core>;
template struct jit_uni_pool_kernel<avx512_core_fp16>;

} // namespace x64
} // namespace cpu
Expand Down
1 change: 1 addition & 0 deletions src/cpu/x64/jit_uni_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1279,6 +1279,7 @@ template struct jit_uni_pooling_fwd_t<avx512_core, data_type::f32>;
template struct jit_uni_pooling_bwd_t<avx512_core, data_type::f32>;
template struct jit_uni_pooling_fwd_t<avx512_core, data_type::bf16>;
template struct jit_uni_pooling_bwd_t<avx512_core, data_type::bf16>;
template struct jit_uni_pooling_fwd_t<avx512_core_fp16, data_type::f16>;

} // namespace x64
} // namespace cpu
Expand Down