Skip to content

Commit 1559a65

Browse files
Revert "[AMDGPU] Handle memcpy()-like ops in LowerBufferFatPointers (#126621)"
This reverts commit 469757e. Multiple buildbot failures have been reported: #126621
1 parent d584d1f commit 1559a65

File tree

3 files changed

+24
-3156
lines changed

3 files changed

+24
-3156
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 24 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,16 @@
4545
//
4646
// This pass proceeds in three main phases:
4747
//
48-
// ## Rewriting loads and stores of p7 and memcpy()-like handling
48+
// ## Rewriting loads and stores of p7
4949
//
5050
// The first phase is to rewrite away all loads and stors of `ptr addrspace(7)`,
5151
// including aggregates containing such pointers, to ones that use `i160`. This
52-
// is handled by `StoreFatPtrsAsIntsAndExpandMemcpyVisitor` , which visits
53-
// loads, stores, and allocas and, if the loaded or stored type contains `ptr
54-
// addrspace(7)`, rewrites that type to one where the p7s are replaced by i160s,
55-
// copying other parts of aggregates as needed. In the case of a store, each
56-
// pointer is `ptrtoint`d to i160 before storing, and load integers are
57-
// `inttoptr`d back. This same transformation is applied to vectors of pointers.
52+
// is handled by `StoreFatPtrsAsIntsVisitor` , which visits loads, stores, and
53+
// allocas and, if the loaded or stored type contains `ptr addrspace(7)`,
54+
// rewrites that type to one where the p7s are replaced by i160s, copying other
55+
// parts of aggregates as needed. In the case of a store, each pointer is
56+
// `ptrtoint`d to i160 before storing, and load integers are `inttoptr`d back.
57+
// This same transformation is applied to vectors of pointers.
5858
//
5959
// Such a transformation allows the later phases of the pass to not need
6060
// to handle buffer fat pointers moving to and from memory, where we load
@@ -66,10 +66,6 @@
6666
// Atomics operations on `ptr addrspace(7)` values are not suppported, as the
6767
// hardware does not include a 160-bit atomic.
6868
//
69-
// In order to save on O(N) work and to ensure that the contents type
70-
// legalizer correctly splits up wide loads, also unconditionally lower
71-
// memcpy-like intrinsics into loops here.
72-
//
7369
// ## Buffer contents type legalization
7470
//
7571
// The underlying buffer intrinsics only support types up to 128 bits long,
@@ -235,24 +231,20 @@
235231
#include "llvm/IR/InstIterator.h"
236232
#include "llvm/IR/InstVisitor.h"
237233
#include "llvm/IR/Instructions.h"
238-
#include "llvm/IR/IntrinsicInst.h"
239234
#include "llvm/IR/Intrinsics.h"
240235
#include "llvm/IR/IntrinsicsAMDGPU.h"
241236
#include "llvm/IR/Metadata.h"
242237
#include "llvm/IR/Operator.h"
243238
#include "llvm/IR/PatternMatch.h"
244239
#include "llvm/IR/ReplaceConstant.h"
245-
#include "llvm/IR/ValueHandle.h"
246240
#include "llvm/InitializePasses.h"
247241
#include "llvm/Pass.h"
248-
#include "llvm/Support/AMDGPUAddrSpace.h"
249242
#include "llvm/Support/Alignment.h"
250243
#include "llvm/Support/AtomicOrdering.h"
251244
#include "llvm/Support/Debug.h"
252245
#include "llvm/Support/ErrorHandling.h"
253246
#include "llvm/Transforms/Utils/Cloning.h"
254247
#include "llvm/Transforms/Utils/Local.h"
255-
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
256248
#include "llvm/Transforms/Utils/ValueMapper.h"
257249

258250
#define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"
@@ -439,16 +431,14 @@ namespace {
439431
/// marshalling costs when reading or storing these values, but since placing
440432
/// such pointers into memory is an uncommon operation at best, we feel that
441433
/// this cost is acceptable for better performance in the common case.
442-
class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
443-
: public InstVisitor<StoreFatPtrsAsIntsAndExpandMemcpyVisitor, bool> {
434+
class StoreFatPtrsAsIntsVisitor
435+
: public InstVisitor<StoreFatPtrsAsIntsVisitor, bool> {
444436
BufferFatPtrToIntTypeMap *TypeMap;
445437

446438
ValueToValueMapTy ConvertedForStore;
447439

448440
IRBuilder<> IRB;
449441

450-
const TargetMachine *TM;
451-
452442
// Convert all the buffer fat pointers within the input value to inttegers
453443
// so that it can be stored in memory.
454444
Value *fatPtrsToInts(Value *V, Type *From, Type *To, const Twine &Name);
@@ -458,27 +448,20 @@ class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
458448
Value *intsToFatPtrs(Value *V, Type *From, Type *To, const Twine &Name);
459449

460450
public:
461-
StoreFatPtrsAsIntsAndExpandMemcpyVisitor(BufferFatPtrToIntTypeMap *TypeMap,
462-
LLVMContext &Ctx,
463-
const TargetMachine *TM)
464-
: TypeMap(TypeMap), IRB(Ctx), TM(TM) {}
451+
StoreFatPtrsAsIntsVisitor(BufferFatPtrToIntTypeMap *TypeMap, LLVMContext &Ctx)
452+
: TypeMap(TypeMap), IRB(Ctx) {}
465453
bool processFunction(Function &F);
466454

467455
bool visitInstruction(Instruction &I) { return false; }
468456
bool visitAllocaInst(AllocaInst &I);
469457
bool visitLoadInst(LoadInst &LI);
470458
bool visitStoreInst(StoreInst &SI);
471459
bool visitGetElementPtrInst(GetElementPtrInst &I);
472-
473-
bool visitMemCpyInst(MemCpyInst &MCI);
474-
bool visitMemMoveInst(MemMoveInst &MMI);
475-
bool visitMemSetInst(MemSetInst &MSI);
476-
bool visitMemSetPatternInst(MemSetPatternInst &MSPI);
477460
};
478461
} // namespace
479462

480-
Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(
481-
Value *V, Type *From, Type *To, const Twine &Name) {
463+
Value *StoreFatPtrsAsIntsVisitor::fatPtrsToInts(Value *V, Type *From, Type *To,
464+
const Twine &Name) {
482465
if (From == To)
483466
return V;
484467
ValueToValueMapTy::iterator Find = ConvertedForStore.find(V);
@@ -515,8 +498,8 @@ Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(
515498
return Ret;
516499
}
517500

518-
Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(
519-
Value *V, Type *From, Type *To, const Twine &Name) {
501+
Value *StoreFatPtrsAsIntsVisitor::intsToFatPtrs(Value *V, Type *From, Type *To,
502+
const Twine &Name) {
520503
if (From == To)
521504
return V;
522505
if (isBufferFatPtrOrVector(To)) {
@@ -548,25 +531,18 @@ Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(
548531
return Ret;
549532
}
550533

551-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::processFunction(Function &F) {
534+
bool StoreFatPtrsAsIntsVisitor::processFunction(Function &F) {
552535
bool Changed = false;
553-
// Process memcpy-like instructions after the main iteration because they can
554-
// invalidate iterators.
555-
SmallVector<WeakTrackingVH> CanBecomeLoops;
536+
// The visitors will mutate GEPs and allocas, but will push loads and stores
537+
// to the worklist to avoid invalidation.
556538
for (Instruction &I : make_early_inc_range(instructions(F))) {
557-
if (isa<MemTransferInst, MemSetInst, MemSetPatternInst>(I))
558-
CanBecomeLoops.push_back(&I);
559-
else
560-
Changed |= visit(I);
561-
}
562-
for (WeakTrackingVH VH : make_early_inc_range(CanBecomeLoops)) {
563-
Changed |= visit(cast<Instruction>(VH));
539+
Changed |= visit(I);
564540
}
565541
ConvertedForStore.clear();
566542
return Changed;
567543
}
568544

569-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(AllocaInst &I) {
545+
bool StoreFatPtrsAsIntsVisitor::visitAllocaInst(AllocaInst &I) {
570546
Type *Ty = I.getAllocatedType();
571547
Type *NewTy = TypeMap->remapType(Ty);
572548
if (Ty == NewTy)
@@ -575,8 +551,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(AllocaInst &I) {
575551
return true;
576552
}
577553

578-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(
579-
GetElementPtrInst &I) {
554+
bool StoreFatPtrsAsIntsVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
580555
Type *Ty = I.getSourceElementType();
581556
Type *NewTy = TypeMap->remapType(Ty);
582557
if (Ty == NewTy)
@@ -588,7 +563,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(
588563
return true;
589564
}
590565

591-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(LoadInst &LI) {
566+
bool StoreFatPtrsAsIntsVisitor::visitLoadInst(LoadInst &LI) {
592567
Type *Ty = LI.getType();
593568
Type *IntTy = TypeMap->remapType(Ty);
594569
if (Ty == IntTy)
@@ -606,7 +581,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(LoadInst &LI) {
606581
return true;
607582
}
608583

609-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(StoreInst &SI) {
584+
bool StoreFatPtrsAsIntsVisitor::visitStoreInst(StoreInst &SI) {
610585
Value *V = SI.getValueOperand();
611586
Type *Ty = V->getType();
612587
Type *IntTy = TypeMap->remapType(Ty);
@@ -622,47 +597,6 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(StoreInst &SI) {
622597
return true;
623598
}
624599

625-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemCpyInst(
626-
MemCpyInst &MCI) {
627-
// TODO: Allow memcpy.p7.p3 as a synonym for the direct-to-LDS copy, which'll
628-
// need loop expansion here.
629-
if (MCI.getSourceAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER &&
630-
MCI.getDestAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER)
631-
return false;
632-
llvm::expandMemCpyAsLoop(&MCI,
633-
TM->getTargetTransformInfo(*MCI.getFunction()));
634-
MCI.eraseFromParent();
635-
return true;
636-
}
637-
638-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemMoveInst(
639-
MemMoveInst &MMI) {
640-
if (MMI.getSourceAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER &&
641-
MMI.getDestAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER)
642-
return false;
643-
report_fatal_error(
644-
"memmove() on buffer descriptors is not implemented because pointer "
645-
"comparison on buffer descriptors isn't implemented\n");
646-
}
647-
648-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst(
649-
MemSetInst &MSI) {
650-
if (MSI.getDestAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER)
651-
return false;
652-
llvm::expandMemSetAsLoop(&MSI);
653-
MSI.eraseFromParent();
654-
return true;
655-
}
656-
657-
bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetPatternInst(
658-
MemSetPatternInst &MSPI) {
659-
if (MSPI.getDestAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER)
660-
return false;
661-
llvm::expandMemSetPatternAsLoop(&MSPI);
662-
MSPI.eraseFromParent();
663-
return true;
664-
}
665-
666600
namespace {
667601
/// Convert loads/stores of types that the buffer intrinsics can't handle into
668602
/// one ore more such loads/stores that consist of legal types.
@@ -1193,7 +1127,6 @@ bool LegalizeBufferContentTypesVisitor::visitStoreInst(StoreInst &SI) {
11931127

11941128
bool LegalizeBufferContentTypesVisitor::processFunction(Function &F) {
11951129
bool Changed = false;
1196-
// Note, memory transfer intrinsics won't
11971130
for (Instruction &I : make_early_inc_range(instructions(F))) {
11981131
Changed |= visit(I);
11991132
}
@@ -2151,12 +2084,6 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
21512084
case Intrinsic::invariant_end:
21522085
case Intrinsic::launder_invariant_group:
21532086
case Intrinsic::strip_invariant_group:
2154-
case Intrinsic::memcpy:
2155-
case Intrinsic::memcpy_inline:
2156-
case Intrinsic::memmove:
2157-
case Intrinsic::memset:
2158-
case Intrinsic::memset_inline:
2159-
case Intrinsic::experimental_memset_pattern:
21602087
return true;
21612088
}
21622089
}
@@ -2426,8 +2353,7 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {
24262353
/*RemoveDeadConstants=*/false, /*IncludeSelf=*/true);
24272354
}
24282355

2429-
StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite(&IntTM, M.getContext(),
2430-
&TM);
2356+
StoreFatPtrsAsIntsVisitor MemOpsRewrite(&IntTM, M.getContext());
24312357
LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(DL,
24322358
M.getContext());
24332359
for (Function &F : M.functions()) {

0 commit comments

Comments
 (0)