Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
eca4879
Advertise fast `popcnt` support for SPIR-V targets.
AlexVlx Sep 24, 2024
82a9f72
Remove incorrect assertion.
AlexVlx Sep 24, 2024
25e9b3c
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 25, 2024
04779ea
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 25, 2024
8597c63
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 25, 2024
f68b396
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 25, 2024
fb29456
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 26, 2024
75e39fa
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 26, 2024
db154ee
SPIR-V doesn't handle integer bitwidths greater than 64.
AlexVlx Sep 26, 2024
ec6167f
Add opt->llc->SPIR-V test.
AlexVlx Sep 26, 2024
4f6da8b
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 26, 2024
5476fcd
Add missing whitespace.
AlexVlx Sep 27, 2024
f0112ff
No need to duplicate the same test under both AMDGPU and SPIRV.
AlexVlx Sep 27, 2024
57c75e9
Remove test as per review feedback.
AlexVlx Sep 27, 2024
f486701
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 27, 2024
178bc50
Merge branch 'main' of https://github.com/llvm/llvm-project into popc…
AlexVlx Sep 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
namespace llvm {
class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
using BaseT = BasicTTIImplBase<SPIRVTTIImpl>;
using TTI = TargetTransformInfo;

friend BaseT;

Expand All @@ -37,6 +38,16 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
explicit SPIRVTTIImpl(const SPIRVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TTI probably should have a better default checking if the operation is legal. It's also not great that TargetLowering also has isCtpopFast

// SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
// is reasonable to assume the Op is fast / preferable to the expanded loop.
// Furthermore, this prevents information being lost if transforms are
// applied to SPIR-V before lowering to a concrete target.
if (!isPowerOf2_32(TyWidth))
return TTI::PSK_Software; // Arbitrary bit-width INT is not core SPIR-V.
return TTI::PSK_FastHardware;
}
};

} // namespace llvm
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt -passes=loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
; RUN: opt -passes=loop-idiom -mtriple=spirv64-amd-amdhsa -S < %s | FileCheck %s

; Mostly copied from x86 version.

Expand Down
2 changes: 2 additions & 0 deletions llvm/test/Transforms/LoopIdiom/SPIRV/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not "SPIRV" in config.root.targets:
config.unsupported = True
128 changes: 128 additions & 0 deletions llvm/test/Transforms/LoopIdiom/SPIRV/popcnt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
; RUN: opt -passes=loop-idiom -mtriple=spirv32-- -S < %s | FileCheck %s
; RUN: opt -passes=loop-idiom -mtriple=spirv64-- -S < %s | FileCheck %s

; Mostly copied from x86 version.

;To recognize this pattern:
;int popcount(unsigned long long a) {
; int c = 0;
; while (a) {
; c++;
; a &= a - 1;
; }
; return c;
;}
;

; CHECK-LABEL: @popcount_i64
; CHECK: entry
; CHECK: llvm.ctpop.i64
; CHECK: ret
define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
entry:
%tobool3 = icmp eq i64 %a, 0
br i1 %tobool3, label %while.end, label %while.body

while.body: ; preds = %entry, %while.body
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
%a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
%inc = add nsw i32 %c.05, 1
%sub = add i64 %a.addr.04, -1
%and = and i64 %sub, %a.addr.04
%tobool = icmp eq i64 %and, 0
br i1 %tobool, label %while.end, label %while.body

while.end: ; preds = %while.body, %entry
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
ret i32 %c.0.lcssa
}

; CHECK-LABEL: @popcount_i32
; CHECK: entry
; CHECK: llvm.ctpop.i32
; CHECK: ret
define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
entry:
%tobool3 = icmp eq i32 %a, 0
br i1 %tobool3, label %while.end, label %while.body

while.body: ; preds = %entry, %while.body
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
%a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
%inc = add nsw i32 %c.05, 1
%sub = add i32 %a.addr.04, -1
%and = and i32 %sub, %a.addr.04
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %while.end, label %while.body

while.end: ; preds = %while.body, %entry
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
ret i32 %c.0.lcssa
}

; CHECK-LABEL: @popcount_i128
; CHECK: entry
; CHECK: llvm.ctpop.i128
; CHECK: ret
define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
entry:
%tobool3 = icmp eq i128 %a, 0
br i1 %tobool3, label %while.end, label %while.body

while.body: ; preds = %entry, %while.body
%c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
%a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
%inc = add nsw i32 %c.05, 1
%sub = add i128 %a.addr.04, -1
%and = and i128 %sub, %a.addr.04
%tobool = icmp eq i128 %and, 0
br i1 %tobool, label %while.end, label %while.body

while.end: ; preds = %while.body, %entry
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
ret i32 %c.0.lcssa
}

; To recognize this pattern:
;int popcount(unsigned long long a, int mydata1, int mydata2) {
; int c = 0;
; while (a) {
; c++;
; a &= a - 1;
; mydata1 *= c;
; mydata2 *= (int)a;
; }
; return c + mydata1 + mydata2;
;}

; CHECK-LABEL: @popcount2
; CHECK: entry
; CHECK: llvm.ctpop.i64
; CHECK: ret
define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
entry:
%tobool9 = icmp eq i64 %a, 0
br i1 %tobool9, label %while.end, label %while.body

while.body: ; preds = %entry, %while.body
%c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
%mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
%mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
%a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
%inc = add nsw i32 %c.013, 1
%sub = add i64 %a.addr.010, -1
%and = and i64 %sub, %a.addr.010
%mul = mul nsw i32 %inc, %mydata1.addr.011
%conv = trunc i64 %and to i32
%mul1 = mul nsw i32 %conv, %mydata2.addr.012
%tobool = icmp eq i64 %and, 0
br i1 %tobool, label %while.end, label %while.body

while.end: ; preds = %while.body, %entry
%c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
%mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
%mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
%add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
%add2 = add i32 %add, %c.0.lcssa
ret i32 %add2
}