45
45
//
46
46
// This pass proceeds in three main phases:
47
47
//
48
- // ## Rewriting loads and stores of p7 and memcpy()-like handling
48
+ // ## Rewriting loads and stores of p7
49
49
//
50
50
// The first phase is to rewrite away all loads and stors of `ptr addrspace(7)`,
51
51
// including aggregates containing such pointers, to ones that use `i160`. This
52
- // is handled by `StoreFatPtrsAsIntsAndExpandMemcpyVisitor ` , which visits
53
- // loads, stores, and allocas and, if the loaded or stored type contains `ptr
54
- // addrspace(7)`, rewrites that type to one where the p7s are replaced by i160s,
55
- // copying other parts of aggregates as needed. In the case of a store, each
56
- // pointer is `ptrtoint`d to i160 before storing, and load integers are
57
- // `inttoptr`d back. This same transformation is applied to vectors of pointers.
52
+ // is handled by `StoreFatPtrsAsIntsVisitor ` , which visits loads, stores, and
53
+ // allocas and, if the loaded or stored type contains `ptr addrspace(7)`,
54
+ // rewrites that type to one where the p7s are replaced by i160s, copying other
55
+ // parts of aggregates as needed. In the case of a store, each pointer is
56
+ // `ptrtoint`d to i160 before storing, and load integers are `inttoptr`d back.
57
+ // This same transformation is applied to vectors of pointers.
58
58
//
59
59
// Such a transformation allows the later phases of the pass to not need
60
60
// to handle buffer fat pointers moving to and from memory, where we load
66
66
// Atomics operations on `ptr addrspace(7)` values are not suppported, as the
67
67
// hardware does not include a 160-bit atomic.
68
68
//
69
- // In order to save on O(N) work and to ensure that the contents type
70
- // legalizer correctly splits up wide loads, also unconditionally lower
71
- // memcpy-like intrinsics into loops here.
72
- //
73
69
// ## Buffer contents type legalization
74
70
//
75
71
// The underlying buffer intrinsics only support types up to 128 bits long,
235
231
#include " llvm/IR/InstIterator.h"
236
232
#include " llvm/IR/InstVisitor.h"
237
233
#include " llvm/IR/Instructions.h"
238
- #include " llvm/IR/IntrinsicInst.h"
239
234
#include " llvm/IR/Intrinsics.h"
240
235
#include " llvm/IR/IntrinsicsAMDGPU.h"
241
236
#include " llvm/IR/Metadata.h"
242
237
#include " llvm/IR/Operator.h"
243
238
#include " llvm/IR/PatternMatch.h"
244
239
#include " llvm/IR/ReplaceConstant.h"
245
- #include " llvm/IR/ValueHandle.h"
246
240
#include " llvm/InitializePasses.h"
247
241
#include " llvm/Pass.h"
248
- #include " llvm/Support/AMDGPUAddrSpace.h"
249
242
#include " llvm/Support/Alignment.h"
250
243
#include " llvm/Support/AtomicOrdering.h"
251
244
#include " llvm/Support/Debug.h"
252
245
#include " llvm/Support/ErrorHandling.h"
253
246
#include " llvm/Transforms/Utils/Cloning.h"
254
247
#include " llvm/Transforms/Utils/Local.h"
255
- #include " llvm/Transforms/Utils/LowerMemIntrinsics.h"
256
248
#include " llvm/Transforms/Utils/ValueMapper.h"
257
249
258
250
#define DEBUG_TYPE " amdgpu-lower-buffer-fat-pointers"
@@ -439,16 +431,14 @@ namespace {
439
431
// / marshalling costs when reading or storing these values, but since placing
440
432
// / such pointers into memory is an uncommon operation at best, we feel that
441
433
// / this cost is acceptable for better performance in the common case.
442
- class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
443
- : public InstVisitor<StoreFatPtrsAsIntsAndExpandMemcpyVisitor , bool > {
434
+ class StoreFatPtrsAsIntsVisitor
435
+ : public InstVisitor<StoreFatPtrsAsIntsVisitor , bool > {
444
436
BufferFatPtrToIntTypeMap *TypeMap;
445
437
446
438
ValueToValueMapTy ConvertedForStore;
447
439
448
440
IRBuilder<> IRB;
449
441
450
- const TargetMachine *TM;
451
-
452
442
// Convert all the buffer fat pointers within the input value to inttegers
453
443
// so that it can be stored in memory.
454
444
Value *fatPtrsToInts (Value *V, Type *From, Type *To, const Twine &Name);
@@ -458,27 +448,20 @@ class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
458
448
Value *intsToFatPtrs (Value *V, Type *From, Type *To, const Twine &Name);
459
449
460
450
public:
461
- StoreFatPtrsAsIntsAndExpandMemcpyVisitor (BufferFatPtrToIntTypeMap *TypeMap,
462
- LLVMContext &Ctx,
463
- const TargetMachine *TM)
464
- : TypeMap(TypeMap), IRB(Ctx), TM(TM) {}
451
+ StoreFatPtrsAsIntsVisitor (BufferFatPtrToIntTypeMap *TypeMap, LLVMContext &Ctx)
452
+ : TypeMap(TypeMap), IRB(Ctx) {}
465
453
bool processFunction (Function &F);
466
454
467
455
bool visitInstruction (Instruction &I) { return false ; }
468
456
bool visitAllocaInst (AllocaInst &I);
469
457
bool visitLoadInst (LoadInst &LI);
470
458
bool visitStoreInst (StoreInst &SI);
471
459
bool visitGetElementPtrInst (GetElementPtrInst &I);
472
-
473
- bool visitMemCpyInst (MemCpyInst &MCI);
474
- bool visitMemMoveInst (MemMoveInst &MMI);
475
- bool visitMemSetInst (MemSetInst &MSI);
476
- bool visitMemSetPatternInst (MemSetPatternInst &MSPI);
477
460
};
478
461
} // namespace
479
462
480
- Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::fatPtrsToInts (
481
- Value *V, Type *From, Type *To, const Twine &Name) {
463
+ Value *StoreFatPtrsAsIntsVisitor ::fatPtrsToInts (Value *V, Type *From, Type *To,
464
+ const Twine &Name) {
482
465
if (From == To)
483
466
return V;
484
467
ValueToValueMapTy::iterator Find = ConvertedForStore.find (V);
@@ -515,8 +498,8 @@ Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(
515
498
return Ret;
516
499
}
517
500
518
- Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::intsToFatPtrs (
519
- Value *V, Type *From, Type *To, const Twine &Name) {
501
+ Value *StoreFatPtrsAsIntsVisitor ::intsToFatPtrs (Value *V, Type *From, Type *To,
502
+ const Twine &Name) {
520
503
if (From == To)
521
504
return V;
522
505
if (isBufferFatPtrOrVector (To)) {
@@ -548,25 +531,18 @@ Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(
548
531
return Ret;
549
532
}
550
533
551
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::processFunction (Function &F) {
534
+ bool StoreFatPtrsAsIntsVisitor ::processFunction (Function &F) {
552
535
bool Changed = false ;
553
- // Process memcpy-like instructions after the main iteration because they can
554
- // invalidate iterators.
555
- SmallVector<WeakTrackingVH> CanBecomeLoops;
536
+ // The visitors will mutate GEPs and allocas, but will push loads and stores
537
+ // to the worklist to avoid invalidation.
556
538
for (Instruction &I : make_early_inc_range (instructions (F))) {
557
- if (isa<MemTransferInst, MemSetInst, MemSetPatternInst>(I))
558
- CanBecomeLoops.push_back (&I);
559
- else
560
- Changed |= visit (I);
561
- }
562
- for (WeakTrackingVH VH : make_early_inc_range (CanBecomeLoops)) {
563
- Changed |= visit (cast<Instruction>(VH));
539
+ Changed |= visit (I);
564
540
}
565
541
ConvertedForStore.clear ();
566
542
return Changed;
567
543
}
568
544
569
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::visitAllocaInst (AllocaInst &I) {
545
+ bool StoreFatPtrsAsIntsVisitor ::visitAllocaInst (AllocaInst &I) {
570
546
Type *Ty = I.getAllocatedType ();
571
547
Type *NewTy = TypeMap->remapType (Ty);
572
548
if (Ty == NewTy)
@@ -575,8 +551,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(AllocaInst &I) {
575
551
return true ;
576
552
}
577
553
578
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst (
579
- GetElementPtrInst &I) {
554
+ bool StoreFatPtrsAsIntsVisitor::visitGetElementPtrInst (GetElementPtrInst &I) {
580
555
Type *Ty = I.getSourceElementType ();
581
556
Type *NewTy = TypeMap->remapType (Ty);
582
557
if (Ty == NewTy)
@@ -588,7 +563,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(
588
563
return true ;
589
564
}
590
565
591
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::visitLoadInst (LoadInst &LI) {
566
+ bool StoreFatPtrsAsIntsVisitor ::visitLoadInst (LoadInst &LI) {
592
567
Type *Ty = LI.getType ();
593
568
Type *IntTy = TypeMap->remapType (Ty);
594
569
if (Ty == IntTy)
@@ -606,7 +581,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(LoadInst &LI) {
606
581
return true ;
607
582
}
608
583
609
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::visitStoreInst (StoreInst &SI) {
584
+ bool StoreFatPtrsAsIntsVisitor ::visitStoreInst (StoreInst &SI) {
610
585
Value *V = SI.getValueOperand ();
611
586
Type *Ty = V->getType ();
612
587
Type *IntTy = TypeMap->remapType (Ty);
@@ -622,47 +597,6 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(StoreInst &SI) {
622
597
return true ;
623
598
}
624
599
625
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemCpyInst (
626
- MemCpyInst &MCI) {
627
- // TODO: Allow memcpy.p7.p3 as a synonym for the direct-to-LDS copy, which'll
628
- // need loop expansion here.
629
- if (MCI.getSourceAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER &&
630
- MCI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
631
- return false ;
632
- llvm::expandMemCpyAsLoop (&MCI,
633
- TM->getTargetTransformInfo (*MCI.getFunction ()));
634
- MCI.eraseFromParent ();
635
- return true ;
636
- }
637
-
638
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemMoveInst (
639
- MemMoveInst &MMI) {
640
- if (MMI.getSourceAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER &&
641
- MMI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
642
- return false ;
643
- report_fatal_error (
644
- " memmove() on buffer descriptors is not implemented because pointer "
645
- " comparison on buffer descriptors isn't implemented\n " );
646
- }
647
-
648
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst (
649
- MemSetInst &MSI) {
650
- if (MSI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
651
- return false ;
652
- llvm::expandMemSetAsLoop (&MSI);
653
- MSI.eraseFromParent ();
654
- return true ;
655
- }
656
-
657
- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetPatternInst (
658
- MemSetPatternInst &MSPI) {
659
- if (MSPI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
660
- return false ;
661
- llvm::expandMemSetPatternAsLoop (&MSPI);
662
- MSPI.eraseFromParent ();
663
- return true ;
664
- }
665
-
666
600
namespace {
667
601
// / Convert loads/stores of types that the buffer intrinsics can't handle into
668
602
// / one ore more such loads/stores that consist of legal types.
@@ -1193,7 +1127,6 @@ bool LegalizeBufferContentTypesVisitor::visitStoreInst(StoreInst &SI) {
1193
1127
1194
1128
bool LegalizeBufferContentTypesVisitor::processFunction (Function &F) {
1195
1129
bool Changed = false ;
1196
- // Note, memory transfer intrinsics won't
1197
1130
for (Instruction &I : make_early_inc_range (instructions (F))) {
1198
1131
Changed |= visit (I);
1199
1132
}
@@ -2151,12 +2084,6 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
2151
2084
case Intrinsic::invariant_end:
2152
2085
case Intrinsic::launder_invariant_group:
2153
2086
case Intrinsic::strip_invariant_group:
2154
- case Intrinsic::memcpy:
2155
- case Intrinsic::memcpy_inline:
2156
- case Intrinsic::memmove:
2157
- case Intrinsic::memset:
2158
- case Intrinsic::memset_inline:
2159
- case Intrinsic::experimental_memset_pattern:
2160
2087
return true ;
2161
2088
}
2162
2089
}
@@ -2426,8 +2353,7 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {
2426
2353
/* RemoveDeadConstants=*/ false , /* IncludeSelf=*/ true );
2427
2354
}
2428
2355
2429
- StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite (&IntTM, M.getContext (),
2430
- &TM);
2356
+ StoreFatPtrsAsIntsVisitor MemOpsRewrite (&IntTM, M.getContext ());
2431
2357
LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite (DL,
2432
2358
M.getContext ());
2433
2359
for (Function &F : M.functions ()) {
0 commit comments