Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9c659c1
fp16 code of tingqian
tiger100256-hu May 10, 2023
20068f6
udpate onednn
tiger100256-hu May 10, 2023
775744d
update onednn repo back to openvino/onednn
tiger100256-hu May 15, 2023
d7da7c5
Merge remote-tracking branch 'origin/master' into tq/fp16
usstq May 24, 2023
84ae48e
retrigger checks
usstq May 24, 2023
42d6b97
remove unwanted changes
usstq May 25, 2023
a56e433
code clean-up
usstq May 25, 2023
3636aaf
fix cc build
usstq May 29, 2023
d209a09
fix according to review comments
usstq Jun 1, 2023
8fab4c4
Merge remote-tracking branch 'origin/master' into tq/fp16
usstq Jun 1, 2023
87f4ee9
retrigger checks
usstq Jun 1, 2023
a58c377
fix according to review comment
usstq Jun 2, 2023
dbda7eb
address review comments
usstq Jun 20, 2023
0649ce7
Merge remote-tracking branch 'origin/master' into tq/fp16
usstq Jun 20, 2023
ed6c0c5
Merge remote-tracking branch 'origin/master' into tq/fp16
usstq Jun 20, 2023
816fa03
Merge branch 'tq/fp16' of https://github.com/usstq/openvino into tq/fp16
usstq Jun 20, 2023
c8ddf20
fix bug in EnforceInferPrcDebug
usstq Jun 21, 2023
d4341fe
Add negative pattern to EnforceInferPrcDebug
usstq Jun 21, 2023
85dc913
Fix BrgConv for f16
usstq Jun 21, 2023
ea82556
Address review comment
usstq Jun 21, 2023
3b8bf07
remove avx512_fp16 isa assert
usstq Jun 21, 2023
8a97deb
Squash onednn commits
usstq Jun 21, 2023
18866a2
MHA : unsupported precision fallbacks to FP32
usstq Jun 21, 2023
1a95b03
Eltwise: replace vcvtsh2ss/vcvtss2sh with vcvtph2ps/vcvtps2ph
usstq Jun 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,15 @@ Config::Config() {
}
#endif

if (!mayiuse(avx512_core_bf16))
enforceBF16 = false;
if (mayiuse(avx512_core_bf16)) {
inferencePrecision = ov::element::bf16;
} else if (mayiuse(avx512_core_amx_fp16)) {
inferencePrecision = ov::element::f16;
} else if (mayiuse(avx512_core_fp16)) {
inferencePrecision = ov::element::f16;
} else {
inferencePrecision = ov::element::f32;
}

CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());

Expand Down Expand Up @@ -183,12 +190,12 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
} else if (key == PluginConfigParams::KEY_ENFORCE_BF16) {
if (val == PluginConfigParams::YES) {
if (mayiuse(avx512_core)) {
enforceBF16 = true;
inferencePrecision = ov::element::bf16;
} else {
IE_THROW() << "Platform doesn't support BF16 format";
}
} else if (val == PluginConfigParams::NO) {
enforceBF16 = false;
inferencePrecision = ov::element::f32;
} else {
IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
<< ". Expected only YES/NO";
Expand All @@ -197,15 +204,21 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
} else if (key == ov::hint::inference_precision.name()) {
if (val == "bf16") {
if (mayiuse(avx512_core)) {
enforceBF16 = true;
inferencePrecision = ov::element::bf16;
} else {
IE_THROW() << "Platform doesn't support BF16 format";
}
} else if (val == "f16") {
if (mayiuse(avx512_core_fp16) || mayiuse(avx512_core_amx_fp16)) {
inferencePrecision = ov::element::f16;
} else {
IE_THROW() << "Platform doesn't support FP16 format";
}
} else if (val == "f32") {
enforceBF16 = false;
inferencePrecision = ov::element::f32;
} else {
IE_THROW() << "Wrong value for property key " << ov::hint::inference_precision.name()
<< ". Supported values: bf16, f32";
<< ". Supported values: bf16, f16, f32";
}
inferencePrecisionSetExplicitly = true;
} else if (PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY == key) {
Expand Down Expand Up @@ -256,10 +269,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
// apply execution mode after all the params are handled to prevent possible conflicts
// when both execution_mode and inference_precision are specified
if (!inferencePrecisionSetExplicitly) {
if (executionMode == ov::hint::ExecutionMode::PERFORMANCE && (mayiuse(avx512_core_bf16))) {
enforceBF16 = true;
if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
if (mayiuse(avx512_core_bf16))
inferencePrecision = ov::element::bf16;
else if (mayiuse(avx512_core_amx_fp16) || mayiuse(avx512_core_fp16))
inferencePrecision = ov::element::f16;
else
inferencePrecision = ov::element::f32;
} else {
enforceBF16 = false;
inferencePrecision = ov::element::f32;
}
}

Expand Down Expand Up @@ -325,8 +343,7 @@ void Config::updateProperties() {
IE_SUPPRESS_DEPRECATED_START
_config.insert({ PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot });
IE_SUPPRESS_DEPRECATED_END;

if (enforceBF16) {
if (inferencePrecision == ov::element::bf16) {
_config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
} else {
_config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO });
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <openvino/runtime/properties.hpp>
#include <openvino/util/common_util.hpp>
#include "utils/debug_caps_config.h"
#include "openvino/runtime/properties.hpp"
#include <openvino/core/type/element_type.hpp>

#include <bitset>
#include <string>
Expand Down Expand Up @@ -63,11 +63,11 @@ struct Config {
bool changedHyperThreading = false;
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
LPTransformsMode lpTransformsMode = LPTransformsMode::On;
bool enforceBF16 = true;
ov::element::Type inferencePrecision = ov::element::bf16;
#else
// Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode.
LPTransformsMode lpTransformsMode = LPTransformsMode::Off;
bool enforceBF16 = false;
ov::element::Type inferencePrecision = ov::element::f32;
#endif
bool inferencePrecisionSetExplicitly = false;
ov::hint::ExecutionMode executionMode = ov::hint::ExecutionMode::PERFORMANCE;
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
return 4;
case dnnl::memory::data_type::bf16:
return 2;
case dnnl::memory::data_type::f16:
return 2;
case dnnl::memory::data_type::s8:
return 1;
case dnnl::memory::data_type::u8:
Expand All @@ -47,6 +49,8 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin
return memory::data_type::s32;
case InferenceEngine::Precision::BF16:
return memory::data_type::bf16;
case InferenceEngine::Precision::FP16:
return memory::data_type::f16;
case InferenceEngine::Precision::I8:
return memory::data_type::s8;
case InferenceEngine::Precision::U8:
Expand All @@ -70,6 +74,8 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat
return InferenceEngine::Precision::I32;
case memory::data_type::bf16:
return InferenceEngine::Precision::BF16;
case memory::data_type::f16:
return InferenceEngine::Precision::FP16;
case memory::data_type::s8:
return InferenceEngine::Precision::I8;
case memory::data_type::u8:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ void jit_convert_truncation_emitter::emit_isa(const std::vector<size_t> &in_vec_
if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8))
h->uni_vcvttps2dq(vmm_dst, vmm_dst);
break;
case ov::element::f16:
// to be exact, vcvtph2ps belongs to AVX512VL/AVX512F
assert(dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16));
h->vcvtph2ps(vmm_dst, Ymm(vmm_src.getIdx()));
break;
case ov::element::i8:
h->uni_vpmovsxbd(vmm_dst, vmm_src);
break;
Expand Down Expand Up @@ -222,6 +227,11 @@ void jit_convert_saturation_emitter::emit_isa(const std::vector<size_t> &in_vec_
if (one_of(output_type, ov::element::i32, ov::element::i8, ov::element::u8))
h->uni_vcvttps2dq(vmm_dst, vmm_dst);
break;
case ov::element::f16:
// to be exact, vcvtph2ps belongs to AVX512VL/AVX512F
assert(dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16));
h->vcvtph2ps(vmm_dst, Ymm(vmm_src.getIdx()));
break;
case ov::element::i8:
h->uni_vpmovsxbd(vmm_dst, vmm_src);
break;
Expand All @@ -234,7 +244,7 @@ void jit_convert_saturation_emitter::emit_isa(const std::vector<size_t> &in_vec_

switch (output_type) {
case ov::element::f32:
if (!one_of(input_type, ov::element::i32, ov::element::bf16)) {
if (!one_of(input_type, ov::element::i32, ov::element::bf16, ov::element::f16)) {
h->uni_vcvtdq2ps(vmm_dst, vmm_dst);
}
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class jit_convert_emitter : public jit_emitter {
ov::element::f32,
ov::element::i32,
ov::element::bf16,
ov::element::f16,
ov::element::i8,
ov::element::u8
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,10 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64 &reg_src, const int out_vec_i
load_bytes_to_dword_extension<Vmm>(Vmm(out_vec_idx), reg_src, offset, false, load_size_);
break;
case Precision::I16:
load_words_to_dword_extension<Vmm>(Vmm(out_vec_idx), reg_src, offset, false, true, load_size_);
break;
case Precision::U16:
load_words_to_dword_extension<Vmm>(Vmm(out_vec_idx), reg_src, offset, false, false, load_size_);
break;
case Precision::BF16:
load_words_to_dword_extension<Vmm>(Vmm(out_vec_idx), reg_src, offset, true, false, load_size_);
case Precision::FP16:
load_words_to_dword_extension<Vmm>(Vmm(out_vec_idx), reg_src, offset, src_prc_, load_size_);
break;
default:
IE_THROW() << "Load emitter in " << name_ << " has unsupported src precision to load.";
Expand All @@ -155,11 +152,11 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64 &reg_src, const int out_vec_i
if (src_prc_ != dst_prc_) {
switch (dst_prc_) {
case Precision::FP32:
if ((src_prc_ != Precision::FP32) && (src_prc_ != Precision::BF16))
if ((src_prc_ != Precision::FP32) && (src_prc_ != Precision::BF16) && (src_prc_ != Precision::FP16))
h->uni_vcvtdq2ps(Vmm(out_vec_idx), Vmm(out_vec_idx));
break;
case Precision::I32:
if ((src_prc_ == Precision::FP32) || (src_prc_ == Precision::BF16)) {
if ((src_prc_ == Precision::FP32) || (src_prc_ == Precision::BF16) || (src_prc_ == Precision::FP16)) {
h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx));
}
break;
Expand Down Expand Up @@ -447,7 +444,7 @@ void jit_load_emitter::load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak
* [0.. 32] for ZMM version of the function. i.e. 16 words -> 16 * 32 bit == 512 bit
*/
template <typename Vmm>
void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size) const {
void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, InferenceEngine::Precision prc, int load_size) const {
constexpr bool is_xmm = std::is_same<Vmm, Xbyak::Xmm>::value;
constexpr bool is_ymm = std::is_same<Vmm, Xbyak::Ymm>::value;
constexpr bool is_zmm = std::is_same<Vmm, Xbyak::Zmm>::value;
Expand All @@ -456,6 +453,13 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
MAYBE_UNUSED(is_ymm);
MAYBE_UNUSED(is_zmm);

bool is_bf16 = (prc == Precision::BF16);
bool is_f16 = (prc == Precision::FP16);
bool is_signed = (prc == Precision::I16);

if (is_f16 && !mayiuse(cpu::x64::avx512_core_fp16))
IE_THROW() << "Load emitter in " << name_ << " only support fp16 on platform with avx512_core_fp16.";

// Ensure extended double words fit inside Zmm (32/2(num) * 32 <= 512)
// For Ymm register, load capacity is halved (16/2(num) * 32 <= 128)
// For Xmm register, load capacity is halved again (8/2(num) * 32 <= 128)
Expand All @@ -477,6 +481,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
if (is_bf16) {
h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
h->uni_vpslld(zmm, zmm, 16);
} else if (is_f16) {
h->vcvtph2ps(zmm, ptr[reg + offset]);
} else {
if (is_signed)
h->uni_vpmovsxwd(zmm, ptr[reg + offset]);
Expand All @@ -489,6 +495,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
if (is_bf16) {
h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
h->uni_vpslld(ymm, ymm, 16);
} else if (is_f16) {
h->vcvtph2ps(ymm, ptr[reg + offset]);
} else {
if (is_signed)
h->uni_vpmovsxwd(ymm, ptr[reg + offset]);
Expand All @@ -501,6 +509,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
if (is_bf16) {
h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
h->uni_vpslld(xmm, xmm, 16);
} else if (is_f16) {
h->vcvtph2ps(xmm, ptr[reg + offset]);
} else {
if (is_signed)
h->uni_vpmovsxwd(xmm, ptr[reg + offset]);
Expand All @@ -518,6 +528,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
if (is_bf16) {
h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
h->uni_vpslld(vmm, vmm, 16);
} else if (is_f16) {
h->vcvtph2ps(vmm | k_mask | T_z, ptr[reg + offset]);
} else {
if (is_signed)
h->uni_vpmovsxwd(vmm | k_mask | T_z, ptr[reg + offset]);
Expand All @@ -530,6 +542,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
if (is_bf16) {
h->uni_vpmovzxwd(vmm, xmm);
h->uni_vpslld(vmm, vmm, 16);
} else if (is_f16) {
h->vcvtph2ps(ymm, xmm);
} else {
if (is_signed)
h->uni_vpmovsxwd(vmm, xmm);
Expand Down Expand Up @@ -665,7 +679,7 @@ void jit_store_emitter::emit_isa(const int in_vec_idx, const Xbyak::Reg64 &reg_d
if (src_prc_ != dst_prc_) {
switch (src_prc_) {
case Precision::FP32:
if ((dst_prc_ != Precision::FP32) && (dst_prc_ != Precision::BF16)) {
if ((dst_prc_ != Precision::FP32) && (dst_prc_ != Precision::BF16) && (dst_prc_ != Precision::FP16)) {
if (is_saturation()) {
h->uni_vcvtps2dq(Vmm(aux_src_idx), Vmm(data_idx));
} else {
Expand All @@ -676,7 +690,7 @@ void jit_store_emitter::emit_isa(const int in_vec_idx, const Xbyak::Reg64 &reg_d
}
break;
case Precision::I32:
if ((dst_prc_ == Precision::FP32) || (dst_prc_ == Precision::BF16)) {
if ((dst_prc_ == Precision::FP32) || (dst_prc_ == Precision::BF16) || (dst_prc_ == Precision::FP16)) {
h->uni_vcvtdq2ps(Vmm(aux_src_idx), Vmm(data_idx));
data_idx = aux_src_idx;
data_reg_updated = true;
Expand All @@ -702,13 +716,10 @@ void jit_store_emitter::emit_isa(const int in_vec_idx, const Xbyak::Reg64 &reg_d
store_dword_to_byte_extension<Vmm>(reg_dst, offset, false, store_num_);
break;
case Precision::I16:
store_dword_to_word_extension<Vmm>(reg_dst, offset, false, true, store_num_);
break;
case Precision::U16:
store_dword_to_word_extension<Vmm>(reg_dst, offset, false, false, store_num_);
break;
case Precision::BF16:
store_dword_to_word_extension<Vmm>(reg_dst, offset, true, false, store_num_);
case Precision::FP16:
store_dword_to_word_extension<Vmm>(reg_dst, offset, dst_prc_, store_num_);
break;
default:
IE_THROW() << "Store emitter in " << name_ << " has unsupported dst precision to store.";
Expand Down Expand Up @@ -1037,7 +1048,11 @@ void jit_store_emitter::store_dword_to_byte_extension(const Xbyak::Reg64 &reg, i
*/
template <typename Vmm>
void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
int offset, bool is_bf16, bool is_signed, int store_num) const {
int offset, InferenceEngine::Precision precision, int store_num) const {
const bool is_bf16 = (precision == Precision::BF16);
const bool is_f16 = (precision == Precision::FP16);
const bool is_signed = (precision == Precision::I16);

constexpr bool is_xmm = std::is_same<Vmm, Xbyak::Xmm>::value;
constexpr bool is_ymm = std::is_same<Vmm, Xbyak::Ymm>::value;
constexpr bool is_zmm = std::is_same<Vmm, Xbyak::Zmm>::value;
Expand Down Expand Up @@ -1135,6 +1150,22 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
data_idx = static_cast<int>(xmm.getIdx());
store_bytes<Vmm>(reg, offset, store_num * 2);
}
} else if (is_f16) {
if (!mayiuse(cpu::x64::avx512_core_fp16))
IE_THROW() << "Store emitter in " << name_ << " only support fp16 on platform with avx512_core_fp16.";
// to avoid src vmm pollution
if (src_prc_ == Precision::FP32) {
// since avx512, zmm(fp32) => ymm(fp16)
ymm = Ymm(aux_vec_idxs[0]);
} // in I32 case, zmm&ymm is already in aux reg

h->vcvtps2ph(ymm, zmm, 0x4);
if (store_num == 16) {
h->vmovdqu16(ptr[reg + offset], ymm);
} else {
data_idx = static_cast<int>(ymm.getIdx());
store_bytes<Vmm>(reg, offset, store_num * 2);
}
} else {
switch (store_num) {
case 16:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class jit_load_emitter : public jit_emitter {
void load_bytes_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_signed, int load_size) const;

template <typename Vmm>
void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int load_size) const;
void load_words_to_dword_extension(const Vmm &vmm, const Xbyak::Reg64 &reg, int offset, InferenceEngine::Precision prc, int load_size) const;

template <typename Vmm>
void fill_with_default(const Vmm &vmm, std::string fill_value, const int &load_num) const;
Expand Down Expand Up @@ -145,7 +145,7 @@ class jit_store_emitter : public jit_emitter {
void store_dword_to_byte_extension(const Xbyak::Reg64 &reg, int offset, bool is_signed, int store_size) const;

template <typename Vmm>
void store_dword_to_word_extension(const Xbyak::Reg64 &reg, int offset, bool is_bf16, bool is_signed, int store_size) const;
void store_dword_to_word_extension(const Xbyak::Reg64 &reg, int offset, InferenceEngine::Precision precision, int store_size) const;

void register_table_entries() override;

Expand Down
4 changes: 1 addition & 3 deletions src/plugins/intel_cpu/src/exec_network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,7 @@ InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const
const bool perfCount = config.collectPerfCounters;
return decltype(ov::enable_profiling)::value_type(perfCount);
} else if (name == ov::hint::inference_precision) {
const auto enforceBF16 = config.enforceBF16;
const auto inference_precision = enforceBF16 ? ov::element::bf16 : ov::element::f32;
return decltype(ov::hint::inference_precision)::value_type(inference_precision);
return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision);
} else if (name == ov::hint::performance_mode) {
const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
return perfHint;
Expand Down
Loading