Skip to content

Commit 80db3b0

Browse files
committed
[RISCV64] integrate SHL eltwise add op into OV
[RISCV64] add shlExecutor option for eltwise node [RISCV64] fix some errors with eltwise executor [RISCV64] add constructor def and vector initialization for ShlTensor [RISCV64] remove redundant debug print [RISCV64] skip failed tests [RISCV64] change way of invoking kernels [RISCV64] set shapes for ShlTensor before SHL add [RISCV64] simplify eltwise kernel invocation [RISCV64] integrate other eltwise ops [RISCV64] fix tests for some eltwise and activation ops [RISCV64] integrate PRelu and LeakyRelu op and fix related tests [RISCV64] integrate Maximum and Minimum op for the need of some tests [RISCV64] fix some tests [RISCV64] fix inaccurate problem with LeakyReLu op [RISCV64] fix some tests [RISCV64] enable debugging for riscv64
1 parent ffc135c commit 80db3b0

30 files changed

+594
-20
lines changed

src/plugins/intel_cpu/src/nodes/eltwise.cpp

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,7 +2266,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
22662266
if (outputPrecision == ov::element::bf16 || hasBF16)
22672267
OPENVINO_THROW("Eltwise node with name `", getName(), "` doesn't support BF16 precision on this target.");
22682268
}
2269-
#if defined(OV_CPU_WITH_ACL)
2269+
#if defined(OV_CPU_WITH_ACL) || defined(OV_CPU_WITH_SHL)
22702270
const bool useJit = false;
22712271
#endif
22722272
#elif defined(OPENVINO_ARCH_ARM64)
@@ -2318,6 +2318,41 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
23182318
outputPrecision = filterPrecision(outputPrecision, forcedPrec);
23192319
} else {
23202320
#endif
2321+
#if defined(OV_CPU_WITH_SHL)
2322+
auto filterPrecision = [&](const ov::element::Type& prc, const ov::element::Type& forcedPrec) {
2323+
if (isBitwise(algorithm)) {
2324+
if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), prc) == supportedPrecisions.end()) {
2325+
OPENVINO_THROW("Eltwise node with name `", getName(), "` doesn't support ", prc, " precision.");
2326+
}
2327+
return prc;
2328+
}
2329+
return forcedPrec;
2330+
};
2331+
2332+
const bool useShl = !useJit;
2333+
if (useShl) {
2334+
// Use original output precision as a reference point since some eltwise algorithms have non-float inputs (i.e. EltwiseSelect)
2335+
ov::element::Type forcedPrec = getOriginalOutputPrecisionAtPort(0) == ov::element::f16 ? ov::element::f16 : ov::element::f32;
2336+
// SHL implementation supports only identical precisions on inputs/outputs so they are aligned it to highest one
2337+
if (ShlEltwiseExecutor::isEltwiseAlgorithmSupported(getAlgorithm())) {
2338+
for (size_t i = 0; i < getParentEdges().size(); i++) {
2339+
if (!getParentEdgeAt(i)->getParent()->isConstant()) {
2340+
if (getOriginalInputPrecisionAtPort(i).size() > forcedPrec.size()) {
2341+
forcedPrec = getOriginalInputPrecisionAtPort(i);
2342+
}
2343+
}
2344+
}
2345+
if (!forcedPrec.is_real()) {
2346+
forcedPrec = ov::element::f32;
2347+
}
2348+
}
2349+
2350+
for (size_t i = 0; i < inputPrecisions.size(); i++) {
2351+
inputPrecisions[i] = filterPrecision(inputPrecisions[i], forcedPrec);
2352+
}
2353+
outputPrecision = filterPrecision(outputPrecision, forcedPrec);
2354+
} else {
2355+
#endif
23212356
auto filterPrecision = [&](const ov::element::Type& prc) {
23222357
if (implType == EltwiseImplType::reference) {
23232358
if (isBitwise(algorithm)) {
@@ -2344,7 +2379,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
23442379
inputPrecisions[i] = filterPrecision(inputPrecisions[i]);
23452380
}
23462381
outputPrecision = filterPrecision(outputPrecision);
2347-
#if defined(OV_CPU_WITH_ACL)
2382+
#if defined(OV_CPU_WITH_ACL) || defined(OV_CPU_WITH_SHL)
23482383
}
23492384
#endif
23502385

@@ -2493,7 +2528,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
24932528
getInputShapeAtPort(i).getRank());
24942529
}
24952530

2496-
#if defined(OPENVINO_ARCH_ARM64)
2531+
#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_RISCV64)
24972532
bool isBlockedApplicable = (!useJit) && one_of(getOutputShapeAtPort(0).getRank(), 1u, 3u, 4u, 5u);
24982533
#else
24992534
bool isBlockedApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1u, 3u, 4u, 5u);
@@ -2512,7 +2547,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
25122547
inputNum = getParentEdges().size();
25132548
currentInBlkDims.resize(inputNum);
25142549

2515-
#if defined (OV_CPU_WITH_ACL)
2550+
#if defined(OV_CPU_WITH_ACL)
25162551
if (useAcl || useJit) {
25172552
eltwiseAttrs = {algorithm, alpha, beta, gamma};
25182553

@@ -2539,6 +2574,24 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
25392574
}
25402575
#endif
25412576

2577+
#if defined(OV_CPU_WITH_SHL)
2578+
if (useShl || useJit) {
2579+
eltwiseAttrs = {algorithm, alpha, beta, gamma};
2580+
2581+
auto addDesc = [&initDesc, &useJit](std::vector<NodeDesc>& supportedPrimitiveDescriptors, const LayoutType layoutType) {
2582+
auto nodeDesc = initDesc(layoutType, !useJit, useJit);
2583+
if (nodeDesc.getExecutorFactory())
2584+
supportedPrimitiveDescriptors.emplace_back(nodeDesc);
2585+
};
2586+
2587+
addDesc(supportedPrimitiveDescriptors, Planar);
2588+
2589+
canUseShlExecutor = !supportedPrimitiveDescriptors.empty() && !useJit;
2590+
if (!supportedPrimitiveDescriptors.empty())
2591+
return;
2592+
}
2593+
#endif
2594+
25422595
if (isChannelsFirstApplicable)
25432596
supportedPrimitiveDescriptors.emplace_back(initDesc(ChannelsFirst));
25442597
if (isBlockedApplicable)
@@ -2570,7 +2623,7 @@ void Eltwise::createPrimitive() {
25702623
}
25712624

25722625
void Eltwise::prepareParams() {
2573-
if (canUseAclExecutor) {
2626+
if (canUseAclExecutor || canUseShlExecutor) {
25742627
std::vector<MemoryDescPtr> srcMemoryDescs;
25752628
for (size_t i = 0; i < getParentEdges().size(); i++) {
25762629
srcMemoryDescs.push_back(getSrcMemoryAtPort(i)->getDescPtr());
@@ -2579,8 +2632,13 @@ void Eltwise::prepareParams() {
25792632
dstMemoryDescs.push_back(getDstMemoryAtPort(0)->getDescPtr());
25802633

25812634
auto selectedPD = getSelectedPrimitiveDescriptor();
2582-
aclExecPtr = selectedPD->getExecutorFactoryAs<EltwiseExecutorFactory>()->makeExecutor(eltwiseAttrs, srcMemoryDescs, dstMemoryDescs, {});
2583-
selectedPD->setImplementationType(aclExecPtr->getImplType());
2635+
if (canUseAclExecutor) {
2636+
aclExecPtr = selectedPD->getExecutorFactoryAs<EltwiseExecutorFactory>()->makeExecutor(eltwiseAttrs, srcMemoryDescs, dstMemoryDescs, {});
2637+
selectedPD->setImplementationType(aclExecPtr->getImplType());
2638+
} else if (canUseShlExecutor) {
2639+
shlExecPtr = selectedPD->getExecutorFactoryAs<EltwiseExecutorFactory>()->makeExecutor(eltwiseAttrs, srcMemoryDescs, dstMemoryDescs, {});
2640+
selectedPD->setImplementationType(shlExecPtr->getImplType());
2641+
}
25842642

25852643
return;
25862644
}
@@ -2757,6 +2815,15 @@ void Eltwise::execute(dnnl::stream strm) {
27572815
dstMemory.push_back(getDstMemoryAtPort(0));
27582816

27592817
aclExecPtr->exec(srcMemory, dstMemory, fqDataPtrs.data());
2818+
} else if (shlExecPtr) {
2819+
std::vector<MemoryCPtr> srcMemory;
2820+
for (size_t i = 0; i < getParentEdges().size(); i++) {
2821+
srcMemory.push_back(getSrcMemoryAtPort(i));
2822+
}
2823+
std::vector<MemoryPtr> dstMemory;
2824+
dstMemory.push_back(getDstMemoryAtPort(0));
2825+
2826+
shlExecPtr->exec(srcMemory, dstMemory, fqDataPtrs.data());
27602827
} else {
27612828
OPENVINO_THROW("Can't execute eltwise node with name: ", getName(), ". Primitive isn't created");
27622829
}

src/plugins/intel_cpu/src/nodes/eltwise.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,10 @@ class Eltwise : public Node {
187187
void appendMemory(const std::vector<float> &data, MemoryPtr &memPtr, std::vector<const void*>& postOpsMem);
188188

189189
bool canUseAclExecutor = false;
190+
bool canUseShlExecutor = false;
190191
EltwiseAttrs eltwiseAttrs;
191192
std::shared_ptr<EltwiseExecutor> aclExecPtr = nullptr;
193+
std::shared_ptr<EltwiseExecutor> shlExecPtr = nullptr;
192194
};
193195

194196
class eltwise_precision_helper {

src/plugins/intel_cpu/src/nodes/executors/eltwise_list.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ namespace intel_cpu {
1010
const std::vector<EltwiseExecutorDesc>& getEltwiseExecutorsList() {
1111
static std::vector<EltwiseExecutorDesc> descs = {
1212
OV_CPU_INSTANCE_ACL(ExecutorType::Acl, std::make_shared<AclEltwiseExecutorBuilder>())
13+
OV_CPU_INSTANCE_SHL(ExecutorType::Shl, std::make_shared<ShlEltwiseExecutorBuilder>())
1314
};
1415

1516
return descs;

src/plugins/intel_cpu/src/nodes/executors/eltwise_list.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#include "aarch64/jit_eltwise.hpp"
1212
#include "acl/acl_eltwise.hpp"
1313
#endif
14+
#if defined(OV_CPU_WITH_SHL)
15+
#include "shl/shl_eltwise.hpp"
16+
#endif
1417

1518
#include "onednn/iml_type_mapper.h"
1619
#include "common/primitive_cache.hpp"

src/plugins/intel_cpu/src/nodes/executors/shl/shl.hpp

Lines changed: 102 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,47 @@ struct ShlTensor : public ShlStructure<csinn_tensor*> {
163163
OPENVINO_ASSERT(get()->dim_count < MAX_DIM, "Shl supports shapes with rank less or equal to 8");
164164
for (int i = 0; i < get()->dim_count; ++i)
165165
get()->dim[i] = static_cast<int32_t>(shape[i]);
166+
};
167+
};
168+
169+
// virtual base class for different kinds of params
170+
struct IShlParams {
171+
public:
172+
virtual ~IShlParams() = default;
173+
virtual void reset(void* t) = 0;
174+
virtual void* get(bool allow_empty = false) const = 0;
175+
virtual void setAPI(csinn_api_enum api) = 0;
176+
};
177+
178+
template <typename T, typename traits = ShlStructureTraits<T>>
179+
struct ShlParams : public ShlStructure<T>, public IShlParams {
180+
ShlParams() {
181+
T params = static_cast<T>(csinn_alloc_params(sizeof(typename std::remove_pointer<T>::type), nullptr));
182+
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_params");
183+
this->reset(params);
184+
}
185+
186+
ShlParams(const ShlSession& session) {
187+
T params = static_cast<T>(csinn_alloc_params(sizeof(typename std::remove_pointer<T>::type), session.get()));
188+
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_params");
189+
this->reset(params);
190+
}
191+
192+
ShlParams(const ShlSession& session, csinn_api_enum api) : ShlParams(session) {
193+
setAPI(api);
194+
}
195+
196+
void reset(void* t) override {
197+
this->ShlStructure<T, traits>::reset(static_cast<T>(t));
198+
}
199+
200+
void* get(bool allow_empty = false) const override {
201+
return this->ShlStructure<T, traits>::get(allow_empty);
202+
}
203+
204+
void setAPI(csinn_api_enum api) override {
205+
auto params = static_cast<typename std::remove_pointer<T>::type*>(this->get());
206+
params->base.api = api;
166207
}
167208
};
168209

@@ -172,18 +213,69 @@ struct ShlStructureTraits<csinn_fc_params*> {
172213
return csinn_free_params(p);
173214
}
174215
};
175-
struct ShlFCParams : public ShlStructure<csinn_fc_params*> {
176-
ShlFCParams() {
177-
csinn_fc_params* params = static_cast<csinn_fc_params*>(csinn_alloc_params(sizeof(csinn_fc_params), nullptr));
178-
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_fc_params");
179-
reset(params);
216+
struct ShlFCParams : public ShlParams<csinn_fc_params*> {
217+
using ShlParams<csinn_fc_params*>::ShlParams;
218+
};
219+
220+
template <>
221+
struct ShlStructureTraits<csinn_diso_params*> {
222+
static void destructor(csinn_diso_params* p) {
223+
return csinn_free_params(p);
180224
}
225+
};
226+
struct ShlDisoParams : public ShlParams<csinn_diso_params*> {
227+
using ShlParams<csinn_diso_params*>::ShlParams;
228+
};
181229

182-
ShlFCParams(const ShlSession& session, csinn_api_enum api) {
183-
csinn_fc_params* params = static_cast<csinn_fc_params*>(csinn_alloc_params(sizeof(csinn_fc_params), session.get()));
184-
OPENVINO_ASSERT(params != nullptr, "Failed to create csinn_fc_params");
185-
params->base.api = api;
186-
reset(params);
230+
template <>
231+
struct ShlStructureTraits<csinn_siso_params*> {
232+
static void destructor(csinn_siso_params* p) {
233+
return csinn_free_params(p);
234+
}
235+
};
236+
struct ShlSisoParams : public ShlParams<csinn_siso_params*> {
237+
using ShlParams<csinn_siso_params*>::ShlParams;
238+
};
239+
240+
template <>
241+
struct ShlStructureTraits<csinn_relu_params*> {
242+
static void destructor(csinn_relu_params* p) {
243+
return csinn_free_params(p);
244+
}
245+
};
246+
struct ShlReluParams : public ShlParams<csinn_relu_params*> {
247+
using ShlParams<csinn_relu_params*>::ShlParams;
248+
249+
ShlReluParams(float alpha) : ShlParams<csinn_relu_params*>() {
250+
auto params = static_cast<csinn_relu_params*>(this->get());
251+
params->n = alpha;
252+
}
253+
254+
ShlReluParams(const ShlSession& session, float alpha) : ShlParams<csinn_relu_params*>(session) {
255+
auto params = static_cast<csinn_relu_params*>(this->get());
256+
params->n = alpha;
257+
}
258+
259+
ShlReluParams(const ShlSession& session, csinn_api_enum api, float alpha) : ShlParams<csinn_relu_params*>(session, api) {
260+
auto params = static_cast<csinn_relu_params*>(this->get());
261+
params->n = alpha;
262+
}
263+
};
264+
265+
template <>
266+
struct ShlStructureTraits<csinn_prelu_params*> {
267+
static void destructor(csinn_prelu_params* p) {
268+
return csinn_free_params(p);
269+
}
270+
};
271+
struct ShlPReluParams : public ShlParams<csinn_prelu_params*> {
272+
using ShlParams<csinn_prelu_params*>::ShlParams;
273+
};
274+
275+
template <>
276+
struct ShlStructureTraits<csinn_conv2d_params*> {
277+
static void destructor(csinn_conv2d_params* p) {
278+
return csinn_free_params(p);
187279
}
188280
};
189281

0 commit comments

Comments
 (0)