Skip to content

Commit aad980a

Browse files
committed
[AMDGPU] Enable atomic optimizer for 64 bit divergent values
1 parent 5bc37d0 commit aad980a

11 files changed

+14422
-3946
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,20 @@ bool AMDGPUAtomicOptimizerImpl::run(Function &F) {
178178
return Changed;
179179
}
180180

181+
static bool shouldOptimizeForType(Type *Ty) {
182+
switch (Ty->getTypeID()) {
183+
case Type::FloatTyID:
184+
case Type::DoubleTyID:
185+
return true;
186+
case Type::IntegerTyID: {
187+
if (Ty->getIntegerBitWidth() == 32 || Ty->getIntegerBitWidth() == 64)
188+
return true;
189+
default:
190+
return false;
191+
}
192+
}
193+
}
194+
181195
void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
182196
// Early exit for unhandled address space atomic instructions.
183197
switch (I.getPointerAddressSpace()) {
@@ -230,8 +244,7 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
230244
// value to the atomic calculation. We can only optimize divergent values if
231245
// we have DPP available on our subtarget, and the atomic operation is 32
232246
// bits.
233-
if (ValDivergent &&
234-
(!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) {
247+
if (ValDivergent && (!ST->hasDPP() || !shouldOptimizeForType(I.getType()))) {
235248
return;
236249
}
237250

@@ -313,8 +326,7 @@ void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) {
313326
// value to the atomic calculation. We can only optimize divergent values if
314327
// we have DPP available on our subtarget, and the atomic operation is 32
315328
// bits.
316-
if (ValDivergent &&
317-
(!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) {
329+
if (ValDivergent && (!ST->hasDPP() || !shouldOptimizeForType(I.getType()))) {
318330
return;
319331
}
320332

@@ -745,7 +757,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
745757
// of each active lane in the wavefront. This will be our new value
746758
// which we will provide to the atomic operation.
747759
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
748-
assert(TyBitWidth == 32);
760+
assert(TyBitWidth == 32 || TyBitWidth == 64);
749761
NewV = B.CreateIntrinsic(Ty, Intrinsic::amdgcn_readlane,
750762
{NewV, LastLaneIdx});
751763
}

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll

Lines changed: 1158 additions & 188 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 872 additions & 166 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Lines changed: 564 additions & 74 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll

Lines changed: 1138 additions & 194 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll

Lines changed: 486 additions & 18 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll

Lines changed: 414 additions & 18 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll

Lines changed: 2992 additions & 1062 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll

Lines changed: 1894 additions & 579 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll

Lines changed: 1894 additions & 579 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll

Lines changed: 2993 additions & 1063 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)