Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 38 additions & 10 deletions src/plugins/intel_cpu/src/nodes/eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2317,6 +2317,16 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
}
outputPrecision = filterPrecision(outputPrecision, forcedPrec);
} else {
#endif
#if defined(OV_CPU_WITH_SHL)
if (ShlEltwiseExecutor::isEltwiseAlgorithmSupported(getAlgorithm())) {
// SHL implementation supports only identical precisions on inputs/outputs and only FP32 for now
const ov::element::Type forcedPrec = ov::element::f32;
for (size_t i = 0; i < inputPrecisions.size(); i++) {
inputPrecisions[i] = forcedPrec;
}
outputPrecision = forcedPrec;
} else {
#endif
auto filterPrecision = [&](const ov::element::Type& prc) {
if (implType == EltwiseImplType::reference) {
Expand Down Expand Up @@ -2344,6 +2354,9 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
inputPrecisions[i] = filterPrecision(inputPrecisions[i]);
}
outputPrecision = filterPrecision(outputPrecision);
#if defined(OV_CPU_WITH_SHL)
}
#endif
#if defined(OV_CPU_WITH_ACL)
}
#endif
Expand All @@ -2364,7 +2377,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
Blocked
};

auto initDesc = [&] (LayoutType lt, const bool useAclExecutor = false, const bool useJit = false) -> NodeDesc {
auto initDesc = [&] (LayoutType lt, const bool useEltwiseExecutor = false, const bool useJit = false) -> NodeDesc {
auto createMemoryDesc = [lt](const Shape &shape, ov::element::Type prc, size_t offset) -> std::shared_ptr<CpuBlockedMemoryDesc> {
const auto &dims = shape.getDims();
if (lt == ChannelsFirst && shape.getRank() != 1) {
Expand Down Expand Up @@ -2438,7 +2451,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {

config.outConfs.push_back(portConfig);

if (useAclExecutor || useJit) {
if (useEltwiseExecutor || useJit) {
impl_desc_type impl_type;
#if defined (OPENVINO_ARCH_ARM64)
if (useJit) {
Expand Down Expand Up @@ -2512,7 +2525,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
inputNum = getParentEdges().size();
currentInBlkDims.resize(inputNum);

#if defined (OV_CPU_WITH_ACL)
#if defined(OV_CPU_WITH_ACL)
if (useAcl || useJit) {
eltwiseAttrs = {algorithm, alpha, beta, gamma};

Expand All @@ -2533,12 +2546,28 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
addDesc(supportedPrimitiveDescriptors, ChannelsFirst);
}

canUseAclExecutor = !supportedPrimitiveDescriptors.empty() && !useJit;
canUseEltwiseExecPtr = !supportedPrimitiveDescriptors.empty() && !useJit;
if (!supportedPrimitiveDescriptors.empty())
return;
}
#endif

#if defined(OV_CPU_WITH_SHL)
eltwiseAttrs = {algorithm, alpha, beta, gamma};

auto addDesc = [&initDesc](std::vector<NodeDesc>& supportedPrimitiveDescriptors, const LayoutType layoutType) {
auto nodeDesc = initDesc(layoutType, true, false);
if (nodeDesc.getExecutorFactory())
supportedPrimitiveDescriptors.emplace_back(nodeDesc);
};

addDesc(supportedPrimitiveDescriptors, Planar);

canUseEltwiseExecPtr = !supportedPrimitiveDescriptors.empty();
if (!supportedPrimitiveDescriptors.empty())
return;
#endif

if (isChannelsFirstApplicable)
supportedPrimitiveDescriptors.emplace_back(initDesc(ChannelsFirst));
if (isBlockedApplicable)
Expand Down Expand Up @@ -2570,7 +2599,7 @@ void Eltwise::createPrimitive() {
}

void Eltwise::prepareParams() {
if (canUseAclExecutor) {
if (canUseEltwiseExecPtr) {
std::vector<MemoryDescPtr> srcMemoryDescs;
for (size_t i = 0; i < getParentEdges().size(); i++) {
srcMemoryDescs.push_back(getSrcMemoryAtPort(i)->getDescPtr());
Expand All @@ -2579,8 +2608,8 @@ void Eltwise::prepareParams() {
dstMemoryDescs.push_back(getDstMemoryAtPort(0)->getDescPtr());

auto selectedPD = getSelectedPrimitiveDescriptor();
aclExecPtr = selectedPD->getExecutorFactoryAs<EltwiseExecutorFactory>()->makeExecutor(eltwiseAttrs, srcMemoryDescs, dstMemoryDescs, {});
selectedPD->setImplementationType(aclExecPtr->getImplType());
eltwiseExecPtr = selectedPD->getExecutorFactoryAs<EltwiseExecutorFactory>()->makeExecutor(eltwiseAttrs, srcMemoryDescs, dstMemoryDescs, {});
selectedPD->setImplementationType(eltwiseExecPtr->getImplType());

return;
}
Expand Down Expand Up @@ -2748,15 +2777,15 @@ void Eltwise::execute(dnnl::stream strm) {
args_ptrs.dst_offsets = execParams.outOffsets.data();
}
execPtr->exec(args_ptrs, dims_out);
} else if (aclExecPtr) {
} else if (eltwiseExecPtr) {
std::vector<MemoryCPtr> srcMemory;
for (size_t i = 0; i < getParentEdges().size(); i++) {
srcMemory.push_back(getSrcMemoryAtPort(i));
}
std::vector<MemoryPtr> dstMemory;
dstMemory.push_back(getDstMemoryAtPort(0));

aclExecPtr->exec(srcMemory, dstMemory, fqDataPtrs.data());
eltwiseExecPtr->exec(srcMemory, dstMemory, fqDataPtrs.data());
} else {
OPENVINO_THROW("Can't execute eltwise node with name: ", getName(), ". Primitive isn't created");
}
Expand Down Expand Up @@ -3154,7 +3183,6 @@ ov::element::Type Eltwise::getRuntimePrecision() const {

return getMaxPrecision(inputPrecisions);
}

} // namespace node
} // namespace intel_cpu
} // namespace ov
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/nodes/eltwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,9 @@ class Eltwise : public Node {
void appendMemory(const std::vector<float> &data, MemoryPtr &memPtr, std::vector<MemoryPtr>& postOpsMem);
void appendMemory(const std::vector<float> &data, MemoryPtr &memPtr, std::vector<const void*>& postOpsMem);

bool canUseAclExecutor = false;
bool canUseEltwiseExecPtr = false;
EltwiseAttrs eltwiseAttrs;
std::shared_ptr<EltwiseExecutor> aclExecPtr = nullptr;
std::shared_ptr<EltwiseExecutor> eltwiseExecPtr = nullptr;
};

class eltwise_precision_helper {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace intel_cpu {
const std::vector<EltwiseExecutorDesc>& getEltwiseExecutorsList() {
static std::vector<EltwiseExecutorDesc> descs = {
OV_CPU_INSTANCE_ACL(ExecutorType::Acl, std::make_shared<AclEltwiseExecutorBuilder>())
OV_CPU_INSTANCE_SHL(ExecutorType::Shl, std::make_shared<ShlEltwiseExecutorBuilder>())
};

return descs;
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/eltwise_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "aarch64/jit_eltwise.hpp"
#include "acl/acl_eltwise.hpp"
#endif
#if defined(OV_CPU_WITH_SHL)
#include "shl/shl_eltwise.hpp"
#endif

#include "onednn/iml_type_mapper.h"
#include "common/primitive_cache.hpp"
Expand Down
128 changes: 118 additions & 10 deletions src/plugins/intel_cpu/src/nodes/executors/shl/shl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,42 @@ struct ShlTensor : public ShlStructure<csinn_tensor*> {
OPENVINO_ASSERT(get()->dim_count < MAX_DIM, "Shl supports shapes with rank less or equal to 8");
for (int i = 0; i < get()->dim_count; ++i)
get()->dim[i] = static_cast<int32_t>(shape[i]);
};
};

// virtual base class for different kinds of params
struct IShlParams {
public:
virtual ~IShlParams() = default;
virtual void* get(bool allow_empty = false) const = 0;
};

template <typename T, typename traits = ShlStructureTraits<T>>
struct ShlParams : public ShlStructure<T>, public IShlParams {
ShlParams() {
T params = static_cast<T>(csinn_alloc_params(sizeof(typename std::remove_pointer<T>::type), nullptr));
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_params");
this->reset(params);
}

ShlParams(const ShlSession& session) {
T params = static_cast<T>(csinn_alloc_params(sizeof(typename std::remove_pointer<T>::type), session.get()));
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_params");
this->reset(params);
}

ShlParams(const ShlSession& session, csinn_api_enum api) : ShlParams(session) {
setAPI(api);
}

void* get(bool allow_empty = false) const override {
return this->ShlStructure<T, traits>::get(allow_empty);
}

private:
void setAPI(csinn_api_enum api) {
auto params = static_cast<typename std::remove_pointer<T>::type*>(this->get());
params->base.api = api;
}
};

Expand All @@ -172,18 +208,90 @@ struct ShlStructureTraits<csinn_fc_params*> {
return csinn_free_params(p);
}
};
struct ShlFCParams : public ShlStructure<csinn_fc_params*> {
ShlFCParams() {
csinn_fc_params* params = static_cast<csinn_fc_params*>(csinn_alloc_params(sizeof(csinn_fc_params), nullptr));
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_fc_params");
reset(params);
struct ShlFCParams : public ShlParams<csinn_fc_params*> {
using ShlParams<csinn_fc_params*>::ShlParams;
};

template <>
struct ShlStructureTraits<csinn_diso_params*> {
static void destructor(csinn_diso_params* p) {
return csinn_free_params(p);
}
};
struct ShlDisoParams : public ShlParams<csinn_diso_params*> {
using ShlParams<csinn_diso_params*>::ShlParams;
};

ShlFCParams(const ShlSession& session, csinn_api_enum api) {
csinn_fc_params* params = static_cast<csinn_fc_params*>(csinn_alloc_params(sizeof(csinn_fc_params), session.get()));
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_fc_params");
params->base.api = api;
reset(params);
template <>
struct ShlStructureTraits<csinn_siso_params*> {
static void destructor(csinn_siso_params* p) {
return csinn_free_params(p);
}
};
struct ShlSisoParams : public ShlParams<csinn_siso_params*> {
using ShlParams<csinn_siso_params*>::ShlParams;
};

template <>
struct ShlStructureTraits<csinn_relu_params*> {
static void destructor(csinn_relu_params* p) {
return csinn_free_params(p);
}
};
struct ShlReluParams : public ShlParams<csinn_relu_params*> {
using ShlParams<csinn_relu_params*>::ShlParams;

ShlReluParams(float alpha) : ShlParams<csinn_relu_params*>() {
auto params = static_cast<csinn_relu_params*>(this->get());
params->n = alpha;
}

ShlReluParams(const ShlSession& session, float alpha) : ShlParams<csinn_relu_params*>(session) {
auto params = static_cast<csinn_relu_params*>(this->get());
params->n = alpha;
}

ShlReluParams(const ShlSession& session, csinn_api_enum api, float alpha) : ShlParams<csinn_relu_params*>(session, api) {
auto params = static_cast<csinn_relu_params*>(this->get());
params->n = alpha;
}
};

template <>
struct ShlStructureTraits<csinn_prelu_params*> {
static void destructor(csinn_prelu_params* p) {
return csinn_free_params(p);
}
};
struct ShlPReluParams : public ShlParams<csinn_prelu_params*> {
using ShlParams<csinn_prelu_params*>::ShlParams;
};

template <>
struct ShlStructureTraits<csinn_clip_params*> {
static void destructor(csinn_clip_params* p) {
return csinn_free_params(p);
}
};
struct ShlClipParams : public ShlParams<csinn_clip_params*> {
using ShlParams<csinn_clip_params*>::ShlParams;

ShlClipParams(float min, float max) : ShlParams<csinn_clip_params*>() {
auto params = static_cast<csinn_clip_params*>(this->get());
params->min_value = min;
params->max_value = max;
}

ShlClipParams(const ShlSession& session, float min, float max) : ShlParams<csinn_clip_params*>(session) {
auto params = static_cast<csinn_clip_params*>(this->get());
params->min_value = min;
params->max_value = max;
}

ShlClipParams(const ShlSession& session, csinn_api_enum api, float min, float max) : ShlParams<csinn_clip_params*>(session, api) {
auto params = static_cast<csinn_clip_params*>(this->get());
params->min_value = min;
params->max_value = max;
}
};

Expand Down
Loading