-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[LAA] Refine stride checks for SCEVs during dependence analysis. #99577
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
1debdc1
e6a864c
d7ed03c
d224139
cdbf2ca
742547f
f7b6417
58f8c83
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1458,12 +1458,11 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, | |||||||||||
return false; | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// Check whether the access through \p Ptr has a constant stride. | ||||||||||||
std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE, | ||||||||||||
Type *AccessTy, Value *Ptr, | ||||||||||||
const Loop *Lp, | ||||||||||||
const DenseMap<Value *, const SCEV *> &StridesMap, | ||||||||||||
bool Assume, bool ShouldCheckWrap) { | ||||||||||||
static std::optional<const SCEV *> | ||||||||||||
getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, | ||||||||||||
const Loop *Lp, | ||||||||||||
const DenseMap<Value *, const SCEV *> &StridesMap, bool Assume, | ||||||||||||
bool ShouldCheckWrap) { | ||||||||||||
Type *Ty = Ptr->getType(); | ||||||||||||
assert(Ty->isPointerTy() && "Unexpected non-ptr"); | ||||||||||||
|
||||||||||||
|
@@ -1520,44 +1519,58 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE, | |||||||||||
if (Rem) | ||||||||||||
return std::nullopt; | ||||||||||||
|
||||||||||||
const SCEV *StrideSCEV = PSE.getSE()->getConstant(C->getType(), Stride); | ||||||||||||
if (!ShouldCheckWrap) | ||||||||||||
return Stride; | ||||||||||||
return StrideSCEV; | ||||||||||||
|
||||||||||||
// The address calculation must not wrap. Otherwise, a dependence could be | ||||||||||||
// inverted. | ||||||||||||
if (isNoWrapAddRec(Ptr, AR, PSE, Lp)) | ||||||||||||
return Stride; | ||||||||||||
return StrideSCEV; | ||||||||||||
|
||||||||||||
// An inbounds getelementptr that is a AddRec with a unit stride | ||||||||||||
// cannot wrap per definition. If it did, the result would be poison | ||||||||||||
// and any memory access dependent on it would be immediate UB | ||||||||||||
// when executed. | ||||||||||||
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); | ||||||||||||
GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1)) | ||||||||||||
return Stride; | ||||||||||||
return StrideSCEV; | ||||||||||||
|
||||||||||||
// If the null pointer is undefined, then a access sequence which would | ||||||||||||
// otherwise access it can be assumed not to unsigned wrap. Note that this | ||||||||||||
// assumes the object in memory is aligned to the natural alignment. | ||||||||||||
unsigned AddrSpace = Ty->getPointerAddressSpace(); | ||||||||||||
if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) && | ||||||||||||
(Stride == 1 || Stride == -1)) | ||||||||||||
return Stride; | ||||||||||||
return StrideSCEV; | ||||||||||||
|
||||||||||||
if (Assume) { | ||||||||||||
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); | ||||||||||||
LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n" | ||||||||||||
<< "LAA: Pointer: " << *Ptr << "\n" | ||||||||||||
<< "LAA: SCEV: " << *AR << "\n" | ||||||||||||
<< "LAA: Added an overflow assumption\n"); | ||||||||||||
return Stride; | ||||||||||||
return StrideSCEV; | ||||||||||||
} | ||||||||||||
LLVM_DEBUG( | ||||||||||||
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " | ||||||||||||
<< *Ptr << " SCEV: " << *AR << "\n"); | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// Check whether the access through \p Ptr has a constant stride. | ||||||||||||
std::optional<int64_t> | ||||||||||||
llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, | ||||||||||||
const Loop *Lp, | ||||||||||||
const DenseMap<Value *, const SCEV *> &StridesMap, | ||||||||||||
bool Assume, bool ShouldCheckWrap) { | ||||||||||||
std::optional<const SCEV *> StrideSCEV = getPtrStrideSCEV( | ||||||||||||
PSE, AccessTy, Ptr, Lp, StridesMap, Assume, ShouldCheckWrap); | ||||||||||||
if (StrideSCEV && isa<SCEVConstant>(*StrideSCEV)) | ||||||||||||
return cast<SCEVConstant>(*StrideSCEV)->getAPInt().getSExtValue(); | ||||||||||||
return std::nullopt; | ||||||||||||
} | ||||||||||||
|
||||||||||||
std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, | ||||||||||||
Type *ElemTyB, Value *PtrB, | ||||||||||||
const DataLayout &DL, | ||||||||||||
|
@@ -1899,23 +1912,11 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, | |||||||||||
return ScaledDist % Stride; | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// Returns true if any of the underlying objects has a loop varying address, | ||||||||||||
/// i.e. may change in \p L. | ||||||||||||
static bool | ||||||||||||
isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects, | ||||||||||||
ScalarEvolution &SE, const Loop *L) { | ||||||||||||
return any_of(UnderlyingObjects, [&SE, L](const Value *UO) { | ||||||||||||
return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L); | ||||||||||||
}); | ||||||||||||
} | ||||||||||||
|
||||||||||||
std::variant<MemoryDepChecker::Dependence::DepType, | ||||||||||||
MemoryDepChecker::DepDistanceStrideAndSizeInfo> | ||||||||||||
MemoryDepChecker::getDependenceDistanceStrideAndSize( | ||||||||||||
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, | ||||||||||||
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, | ||||||||||||
const DenseMap<Value *, SmallVector<const Value *, 16>> | ||||||||||||
&UnderlyingObjects) { | ||||||||||||
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) { | ||||||||||||
auto &DL = InnermostLoop->getHeader()->getDataLayout(); | ||||||||||||
auto &SE = *PSE.getSE(); | ||||||||||||
auto [APtr, AIsWrite] = A; | ||||||||||||
|
@@ -1933,39 +1934,30 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( | |||||||||||
BPtr->getType()->getPointerAddressSpace()) | ||||||||||||
return MemoryDepChecker::Dependence::Unknown; | ||||||||||||
|
||||||||||||
int64_t StrideAPtr = | ||||||||||||
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true) | ||||||||||||
.value_or(0); | ||||||||||||
int64_t StrideBPtr = | ||||||||||||
getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true) | ||||||||||||
.value_or(0); | ||||||||||||
std::optional<const SCEV *> StrideAPtr = getPtrStrideSCEV( | ||||||||||||
PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true); | ||||||||||||
std::optional<const SCEV *> StrideBPtr = getPtrStrideSCEV( | ||||||||||||
PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true); | ||||||||||||
|
||||||||||||
const SCEV *Src = PSE.getSCEV(APtr); | ||||||||||||
const SCEV *Sink = PSE.getSCEV(BPtr); | ||||||||||||
|
||||||||||||
// If the induction step is negative we have to invert source and sink of the | ||||||||||||
// dependence when measuring the distance between them. We should not swap | ||||||||||||
// AIsWrite with BIsWrite, as their uses expect them in program order. | ||||||||||||
if (StrideAPtr < 0) { | ||||||||||||
if (StrideAPtr && SE.isKnownNegative(*StrideAPtr)) { | ||||||||||||
std::swap(Src, Sink); | ||||||||||||
std::swap(AInst, BInst); | ||||||||||||
std::swap(StrideAPtr, StrideBPtr); | ||||||||||||
} | ||||||||||||
|
||||||||||||
const SCEV *Dist = SE.getMinusSCEV(Sink, Src); | ||||||||||||
|
||||||||||||
LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink | ||||||||||||
<< "(Induction step: " << StrideAPtr << ")\n"); | ||||||||||||
<< "\n"); | ||||||||||||
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst | ||||||||||||
<< ": " << *Dist << "\n"); | ||||||||||||
|
||||||||||||
// Needs accesses where the addresses of the accessed underlying objects do | ||||||||||||
// not change within the loop. | ||||||||||||
if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE, | ||||||||||||
InnermostLoop) || | ||||||||||||
isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE, | ||||||||||||
InnermostLoop)) | ||||||||||||
return MemoryDepChecker::Dependence::IndirectUnsafe; | ||||||||||||
|
||||||||||||
// Check if we can prove that Sink only accesses memory after Src's end or | ||||||||||||
// vice versa. At the moment this is limited to cases where either source or | ||||||||||||
// sink are loop invariant to avoid compile-time increases. This is not | ||||||||||||
|
@@ -1987,11 +1979,47 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( | |||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
// Need accesses with constant strides and the same direction. We don't want | ||||||||||||
// to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that | ||||||||||||
// could wrap in the address space. | ||||||||||||
if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) || | ||||||||||||
(StrideAPtr < 0 && StrideBPtr > 0)) { | ||||||||||||
// Need accesses with constant strides and the same direction for further | ||||||||||||
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and | ||||||||||||
// similar code or pointer arithmetic that could wrap in the address space. | ||||||||||||
// | ||||||||||||
// If Src or Sink are non-wrapping AddRecs, StrideAPtr and StrideBPtr contain | ||||||||||||
// a SCEV representing the stride of the SCEV. It may not be a known constant | ||||||||||||
// value though. | ||||||||||||
|
||||||||||||
// If either Src or Sink are not strided (i.e. not a non-wrapping AddRec), we | ||||||||||||
// cannot analyze the dependence further. | ||||||||||||
if (!StrideAPtr || !StrideBPtr) { | ||||||||||||
bool SrcInvariant = SE.isLoopInvariant(Src, InnermostLoop); | ||||||||||||
bool SinkInvariant = SE.isLoopInvariant(Sink, InnermostLoop); | ||||||||||||
// If either Src or Sink are not loop invariant and not strided, the | ||||||||||||
// expression in the current iteration may overlap with any earlier or later | ||||||||||||
// iteration. This is not safe and we also cannot generate runtime checks to | ||||||||||||
// ensure safety. This includes expressions where an index is loaded in each | ||||||||||||
// iteration or wrapping AddRecs. | ||||||||||||
if ((!SrcInvariant && !StrideAPtr) || (!SinkInvariant && !StrideBPtr)) | ||||||||||||
return MemoryDepChecker::Dependence::IndirectUnsafe; | ||||||||||||
|
||||||||||||
// Otherwise both Src or Sink are either loop invariant or strided and we | ||||||||||||
// can generate a runtime check to disambiguate the accesses. | ||||||||||||
return MemoryDepChecker::Dependence::Unknown; | ||||||||||||
} | ||||||||||||
|
||||||||||||
LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << **StrideAPtr | ||||||||||||
<< " Sink induction step: " << **StrideBPtr << "\n"); | ||||||||||||
// If either Src or Sink have a non-constant stride, we can generate a runtime | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
[nit] space There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be fixed in the latest version ,thanks! |
||||||||||||
// check to disambiguate them. | ||||||||||||
if ((!isa<SCEVConstant>(*StrideAPtr)) || (!isa<SCEVConstant>(*StrideBPtr))) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
However, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be simplified in the latest version |
||||||||||||
return MemoryDepChecker::Dependence::Unknown; | ||||||||||||
|
||||||||||||
// Both Src and Sink have a constant stride, check if they are in the same | ||||||||||||
// direction. | ||||||||||||
int64_t StrideAPtrInt = | ||||||||||||
cast<SCEVConstant>(*StrideAPtr)->getAPInt().getSExtValue(); | ||||||||||||
int64_t StrideBPtrInt = | ||||||||||||
cast<SCEVConstant>(*StrideBPtr)->getAPInt().getSExtValue(); | ||||||||||||
if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) || | ||||||||||||
(StrideAPtrInt < 0 && StrideBPtrInt > 0)) { | ||||||||||||
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only the check for opposite stride directions remain here. Please update the comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated in the latest version, thanks! |
||||||||||||
return MemoryDepChecker::Dependence::Unknown; | ||||||||||||
} | ||||||||||||
|
@@ -2001,22 +2029,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( | |||||||||||
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); | ||||||||||||
if (!HasSameSize) | ||||||||||||
TypeByteSize = 0; | ||||||||||||
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr), | ||||||||||||
std::abs(StrideBPtr), TypeByteSize, | ||||||||||||
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt), | ||||||||||||
std::abs(StrideBPtrInt), TypeByteSize, | ||||||||||||
AIsWrite, BIsWrite); | ||||||||||||
} | ||||||||||||
|
||||||||||||
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( | ||||||||||||
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, | ||||||||||||
unsigned BIdx, | ||||||||||||
const DenseMap<Value *, SmallVector<const Value *, 16>> | ||||||||||||
&UnderlyingObjects) { | ||||||||||||
MemoryDepChecker::Dependence::DepType | ||||||||||||
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, | ||||||||||||
const MemAccessInfo &B, unsigned BIdx) { | ||||||||||||
assert(AIdx < BIdx && "Must pass arguments in program order"); | ||||||||||||
|
||||||||||||
// Get the dependence distance, stride, type size and what access writes for | ||||||||||||
// the dependence between A and B. | ||||||||||||
auto Res = getDependenceDistanceStrideAndSize( | ||||||||||||
A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects); | ||||||||||||
auto Res = | ||||||||||||
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]); | ||||||||||||
if (std::holds_alternative<Dependence::DepType>(Res)) | ||||||||||||
return std::get<Dependence::DepType>(Res); | ||||||||||||
|
||||||||||||
|
@@ -2250,10 +2276,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( | |||||||||||
return Dependence::BackwardVectorizable; | ||||||||||||
} | ||||||||||||
|
||||||||||||
bool MemoryDepChecker::areDepsSafe( | ||||||||||||
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, | ||||||||||||
const DenseMap<Value *, SmallVector<const Value *, 16>> | ||||||||||||
&UnderlyingObjects) { | ||||||||||||
bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, | ||||||||||||
MemAccessInfoList &CheckDeps) { | ||||||||||||
|
||||||||||||
MinDepDistBytes = -1; | ||||||||||||
SmallPtrSet<MemAccessInfo, 8> Visited; | ||||||||||||
|
@@ -2296,8 +2320,8 @@ bool MemoryDepChecker::areDepsSafe( | |||||||||||
if (*I1 > *I2) | ||||||||||||
std::swap(A, B); | ||||||||||||
|
||||||||||||
Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, | ||||||||||||
B.second, UnderlyingObjects); | ||||||||||||
Dependence::DepType Type = | ||||||||||||
isDependent(*A.first, A.second, *B.first, B.second); | ||||||||||||
mergeInStatus(Dependence::isSafeForVectorization(Type)); | ||||||||||||
|
||||||||||||
// Gather dependences unless we accumulated MaxDependences | ||||||||||||
|
@@ -2652,8 +2676,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, | |||||||||||
if (Accesses.isDependencyCheckNeeded()) { | ||||||||||||
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); | ||||||||||||
DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, | ||||||||||||
Accesses.getDependenciesToCheck(), | ||||||||||||
Accesses.getUnderlyingObjects()); | ||||||||||||
Accesses.getDependenciesToCheck()); | ||||||||||||
|
||||||||||||
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { | ||||||||||||
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); | ||||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC, this is the relevant change. An access must be strided or invariant for analysis. That's a good simplification over whatever
isLoopVariantIndirectAddress(UnderlyingObjects...)
does.I would inverse the condition, to check the supported cases and fallback to the unsupported one (we may want to support more cases in the future):
(NB: Pretty weird to call the more constrained case "Unknown")
Since invariant means stride 0, why not make
getPtrStride
return 0 in that case? Seems like confusing0
and "cannot determine stride" is the root cause of this problem.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I originally was worried about other users of getPtrStride which may not handled be all callers properly. But it looks like all all other callers use
.value_or(0)
so we should be fine to update it as suggested.Did this in the latest version, should be much simpler now, thanks!
Agreed, naming could be improved separately
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also,
IndirectUnsafe
may have been intended to only apply for theA[B[i]]
case (access ptr depends on index loaded from memory whose address itself is not invariant). #87189 may have demonstrated that trying to identify the cases that don't work is bound to fail (in this case forgetting other SCEVUnknown can be source of unpredictable access as well, for instance the return value of a function call), but instead one should identify the cases that are known to be supported (in this case: constant strides). That meansIndirectUnsafe
should be renamed as well, since it doesn't just apply toA[B[i]]
anymore.You probably intend to extend
getPtrStride
to return a SCEV so one can support non-constant strides, but that are invariant to the loop. It might be useful to pass APtr/BPtr as a SCEV only (Src
/Sink
) consistently as well instead callingPSE.getSCEV
on-demand.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will look into renaming separately, thanks!
Yep, will also work on that as follow-up, thanks!