Skip to content

[SYCL] Add support for new FPGA loop attribute nofusion #2715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1849,6 +1849,18 @@ def SYCLIntelFPGASpeculatedIterations : Attr {
let Documentation = [SYCLIntelFPGASpeculatedIterationsAttrDocs];
}

def SYCLIntelFPGANofusion : Attr {
let Spellings = [CXX11<"intel","nofusion">];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let HasCustomTypeTransform = 1;
let AdditionalMembers = [{
static const char *getName() {
return "nofusion";
}
}];
let Documentation = [SYCLIntelFPGANofusionAttrDocs];
}

def IntelFPGALocalNonConstVar : SubsetSubject<Var,
[{S->hasLocalStorage() &&
S->getKind() != Decl::ImplicitParam &&
Expand Down
9 changes: 9 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2418,6 +2418,15 @@ used on the same loop in conjunction with disable_loop_pipelining.
}];
}

def SYCLIntelFPGANofusionAttrDocs : Documentation {
let Category = DocCatVariable;
let Heading = "intel::nofusion";
let Content = [{
This attribute applies to a loop. Indicates that the annotated
loop should not be fused with any adjacent loop.
}];
}

def SYCLDeviceIndirectlyCallableDocs : Documentation {
let Category = DocCatFunction;
let Heading = "intel::device_indirectly_callable";
Expand Down
22 changes: 19 additions & 3 deletions clang/lib/CodeGen/CGLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,12 @@ MDNode *LoopInfo::createMetadata(
LoopProperties.push_back(MDNode::get(Ctx, Vals));
}

// nofusion attribute corresponds to 'llvm.loop.fusion.disable' metadata
if (Attrs.SYCLNofusionEnable) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.fusion.disable")};
LoopProperties.push_back(MDNode::get(Ctx, Vals));
}

if (Attrs.SYCLSpeculatedIterationsEnable) {
Metadata *Vals[] = {
MDString::get(Ctx, "llvm.loop.intel.speculated.iterations.count"),
Expand All @@ -604,7 +610,8 @@ LoopAttributes::LoopAttributes(bool IsParallel)
SYCLSpeculatedIterationsEnable(false),
SYCLSpeculatedIterationsNIterations(0), UnrollCount(0),
UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified),
PipelineDisabled(false), PipelineInitiationInterval(0) {}
PipelineDisabled(false), PipelineInitiationInterval(0),
SYCLNofusionEnable(false) {}

void LoopAttributes::clear() {
IsParallel = false;
Expand All @@ -631,6 +638,7 @@ void LoopAttributes::clear() {
DistributeEnable = LoopAttributes::Unspecified;
PipelineDisabled = false;
PipelineInitiationInterval = 0;
SYCLNofusionEnable = false;
}

LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
Expand Down Expand Up @@ -663,7 +671,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified &&
Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc &&
!EndLoc)
Attrs.SYCLNofusionEnable == false && !EndLoc)
return;

TempLoopID = MDNode::getTemporary(Header->getContext(), None);
Expand Down Expand Up @@ -970,6 +978,8 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
// For attribute speculated_iterations:
// n - 'llvm.loop.intel.speculated.iterations.count, i32 n' metadata will be
// emitted
// For attribute nofusion:
// 'llvm.loop.fusion.disable' metadata will be emitted
for (const auto *Attr : Attrs) {
const SYCLIntelFPGAIVDepAttr *IntelFPGAIVDep =
dyn_cast<SYCLIntelFPGAIVDepAttr>(Attr);
Expand All @@ -986,10 +996,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
dyn_cast<SYCLIntelFPGAMaxInterleavingAttr>(Attr);
const SYCLIntelFPGASpeculatedIterationsAttr *IntelFPGASpeculatedIterations =
dyn_cast<SYCLIntelFPGASpeculatedIterationsAttr>(Attr);
const SYCLIntelFPGANofusionAttr *IntelFPGANofusion =
dyn_cast<SYCLIntelFPGANofusionAttr>(Attr);

if (!IntelFPGAIVDep && !IntelFPGAII && !IntelFPGAMaxConcurrency &&
!IntelFPGALoopCoalesce && !IntelFPGADisableLoopPipelining &&
!IntelFPGAMaxInterleaving && !IntelFPGASpeculatedIterations)
!IntelFPGAMaxInterleaving && !IntelFPGASpeculatedIterations &&
!IntelFPGANofusion)
continue;

if (IntelFPGAIVDep)
Expand Down Expand Up @@ -1034,6 +1047,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
->getIntegerConstantExpr(Ctx)
->getSExtValue());
}

if (IntelFPGANofusion)
setSYCLNofusionEnable();
}

if (CGOpts.OptimizationLevel > 0)
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CGLoopInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ struct LoopAttributes {

/// Value for llvm.loop.pipeline.iicount metadata.
unsigned PipelineInitiationInterval;

/// Flag for llvm.loop.fusion.disable metatdata.
bool SYCLNofusionEnable;
};

/// Information used when generating a structured loop.
Expand Down Expand Up @@ -405,6 +408,9 @@ class LoopInfoStack {
StagedAttrs.PipelineInitiationInterval = C;
}

/// Set flag of nofusion for the next loop pushed.
void setSYCLNofusionEnable() { StagedAttrs.SYCLNofusionEnable = true; }

private:
/// Returns true if there is LoopInfo on the stack.
bool hasInfo() const { return !Active.empty(); }
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Parse/ParseStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2580,7 +2580,8 @@ bool Parser::ParseSYCLLoopAttributes(ParsedAttributes &Attrs) {
Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGAMaxInterleaving &&
Attrs.begin()->getKind() !=
ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations &&
Attrs.begin()->getKind() != ParsedAttr::AT_LoopUnrollHint)
Attrs.begin()->getKind() != ParsedAttr::AT_LoopUnrollHint &&
Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGANofusion)
return true;

bool IsIntelFPGAAttribute = (Attrs.begin()->getKind() != ParsedAttr::AT_LoopUnrollHint);
Expand Down
17 changes: 17 additions & 0 deletions clang/lib/Sema/SemaStmtAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,19 @@ static Attr *handleIntelFPGAIVDepAttr(Sema &S, const ParsedAttr &A) {
NumArgs == 2 ? A.getArgAsExpr(1) : nullptr);
}

static Attr *handleIntelFPGANofusionAttr(Sema &S, const ParsedAttr &A) {
if (S.LangOpts.SYCLIsHost)
return nullptr;

unsigned NumArgs = A.getNumArgs();
if (NumArgs > 0) {
S.Diag(A.getLoc(), diag::warn_attribute_too_many_arguments) << A << 0;
return nullptr;
}

return new (S.Context) SYCLIntelFPGANofusionAttr(S.Context, A);
}

static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A,
SourceRange) {
IdentifierLoc *PragmaNameLoc = A.getArgAsIdent(0);
Expand Down Expand Up @@ -675,6 +688,8 @@ static void CheckForIncompatibleSYCLLoopAttributes(
S, Attrs, Range);

CheckRedundantSYCLIntelFPGAIVDepAttrs(S, Attrs);
CheckForDuplicationSYCLLoopAttribute<SYCLIntelFPGANofusionAttr>(S, Attrs,
Range);
}

void CheckForIncompatibleUnrollHintAttributes(
Expand Down Expand Up @@ -803,6 +818,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
return handleLikely(S, St, A, Range);
case ParsedAttr::AT_Unlikely:
return handleUnlikely(S, St, A, Range);
case ParsedAttr::AT_SYCLIntelFPGANofusion:
return handleIntelFPGANofusionAttr(S, A);
default:
// if we're here, then we parsed a known attribute, but didn't recognize
// it as a statement attribute => it is declaration attribute
Expand Down
65 changes: 65 additions & 0 deletions clang/test/CodeGenSYCL/intel-fpga-nofusion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -fsycl -fsycl-is-device -internal-isystem %S/Inputs -emit-llvm %s -o - | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

void nofusion() {
int a[10];

int i = 0;
[[intel::nofusion]] while (i < 10) {
// CHECK: br label {{.*}}, !llvm.loop ![[MD_NF_1:.*]]
a[i] += 2;
}

[[intel::nofusion]] do {
// CHECK: br i1 %{{.*}}, !llvm.loop ![[MD_NF_2:.*]]
a[i] += 3;
}
while (i < 10)
;

[[intel::nofusion]] for (int i = 0; i < 10; ++i) {
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_3:.*]]
for (int j = 0; j < 10; ++j) {
// CHECK-NOT: br label %{{.*}}, !llvm.loop !{{.*}}
a[i] += a[j];
}
}

int k;
[[intel::nofusion]] for (auto k : a) {
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_5:.*]]
k += 4;
}

[[intel::nofusion]] for (int i = 0; i < 10; ++i) {
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_6:.*]]
a[i] += 5;
}

for (int i = 0; i < 10; ++i) {
// CHECK-NOT: br label %{{.*}}, !llvm.loop !{{.*}}
[[intel::nofusion]] for (int j = 0; j < 10; ++j) {
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_8:.*]]
a[i] += a[j];
}
}
}

int main() {
q.submit([&](handler &h) {
h.single_task<class kernel_function>([]() { nofusion(); });
});
return 0;
}

// CHECK: ![[MD_NF_1]] = distinct !{![[MD_NF_1]], ![[MD_Nofusion:[0-9]+]]}
// CHECK: ![[MD_Nofusion]] = !{!"llvm.loop.fusion.disable"}
// CHECK: ![[MD_NF_2]] = distinct !{![[MD_NF_2]], ![[MD_Nofusion]]}
// CHECK: ![[MD_NF_3]] = distinct !{![[MD_NF_3]], ![[MD_Nofusion]]}
// CHECK: ![[MD_NF_5]] = distinct !{![[MD_NF_5]], ![[MD_Nofusion]]}
// CHECK: ![[MD_NF_6]] = distinct !{![[MD_NF_6]], ![[MD_Nofusion]]}
// CHECK: ![[MD_NF_8]] = distinct !{![[MD_NF_8]], ![[MD_Nofusion]]}
18 changes: 18 additions & 0 deletions clang/test/SemaSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ void foo() {
[[intel::max_interleaving(4)]] int i[10];
// expected-error@+1 {{intelfpga loop attributes must be applied to for, while, or do statements}}
[[intel::speculated_iterations(6)]] int j[10];
// expected-error@+1 {{intelfpga loop attributes must be applied to for, while, or do statements}}
[[intel::nofusion]] int k[10];
}

// Test for deprecated spelling of Intel FPGA loop attributes
Expand Down Expand Up @@ -114,6 +116,9 @@ void boo() {
// expected-warning@+1 {{'speculated_iterations' attribute takes no more than 1 argument - attribute ignored}}
[[intel::speculated_iterations(1, 2)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// expected-warning@+1 {{'nofusion' attribute takes no more than 0 arguments - attribute ignored}}
[[intel::nofusion(0)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// Test for incorrect argument value for Intel FPGA loop attributes
Expand Down Expand Up @@ -187,6 +192,10 @@ void goo() {
// no diagnostics are expected
[[intel::ivdep(2, s.ptr)]] for (int i = 0; i != 10; ++i)
s.ptr[i] = 0;

// no diagnostics are expected
[[intel::nofusion]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// Test for Intel FPGA loop attributes duplication
Expand Down Expand Up @@ -290,6 +299,11 @@ void zoo() {
// expected-note@+1 {{previous attribute is here}}
[[intel::ivdep(a, 3)]] for (int i = 0; i != 10; ++i)
a[i] = 0;

[[intel::nofusion]]
// expected-error@-1 {{duplicate Intel FPGA loop attribute 'nofusion'}}
[[intel::nofusion]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// Test for Intel FPGA loop attributes compatibility
Expand Down Expand Up @@ -319,6 +333,10 @@ void loop_attrs_compatibility() {
[[intel::disable_loop_pipelining]]
[[intel::ivdep]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// no diagnostics are expected
[[intel::disable_loop_pipelining]]
[[intel::nofusion]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

template<int A, int B, int C>
Expand Down
39 changes: 39 additions & 0 deletions clang/test/SemaSYCL/intel-fpga-nofusion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat -verify %s | FileCheck %s
// expected-no-diagnostics

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

void nofusion() {
int a1[10], a2[10];

// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelFPGANofusionAttr {{.*}}
[[intel::nofusion]] for (int p = 0; p < 10; ++p) {
a1[p] = a2[p] = 0;
}

// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelFPGANofusionAttr {{.*}}
int i = 0;
[[intel::nofusion]] while (i < 10) {
a1[i] += 3;
}

// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelFPGANofusionAttr {{.*}}
for (int i = 0; i < 10; ++i) {
[[intel::nofusion]] for (int j = 0; j < 10; ++j) {
a1[i] += a1[j];
}
}
}

int main() {
q.submit([&](handler &h) {
h.single_task<class kernel_function>([]() { nofusion(); });
});
return 0;
}