diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index a4a0846df7af1..b1cbb048e0a25 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -63,6 +64,7 @@ #define DEBUG_TYPE "basicaa" using namespace llvm; +using namespace llvm::PatternMatch; /// Enable analysis of recursive PHI nodes. static cl::opt EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden, @@ -251,31 +253,48 @@ void EarliestEscapeInfo::removeInstruction(Instruction *I) { //===----------------------------------------------------------------------===// namespace { -/// Represents zext(sext(trunc(V))). + +// VScale can be introduced from both gep offsets and uses of by llvm.vscale. +// This value represents vscale in VariableGEPIndex, so the two can be used in +// similar ways. +const Value *VScaleSentinel = reinterpret_cast(-1); + +/// Represents zext(sext(trunc(V))). For vscale V uses VScaleSentinel. struct CastedValue { const Value *V; unsigned ZExtBits = 0; unsigned SExtBits = 0; unsigned TruncBits = 0; + unsigned OriginalBits; - explicit CastedValue(const Value *V) : V(V) {} + explicit CastedValue(const Value *V) : V(V) { + OriginalBits = V->getType()->getPrimitiveSizeInBits(); + } explicit CastedValue(const Value *V, unsigned ZExtBits, unsigned SExtBits, unsigned TruncBits) - : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits), TruncBits(TruncBits) {} + : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits), TruncBits(TruncBits) { + OriginalBits = V->getType()->getPrimitiveSizeInBits(); + } unsigned getBitWidth() const { - return V->getType()->getPrimitiveSizeInBits() - TruncBits + ZExtBits + - SExtBits; + return OriginalBits - TruncBits + ZExtBits + SExtBits; } CastedValue withValue(const Value *NewV) const { return CastedValue(NewV, ZExtBits, SExtBits, TruncBits); } + CastedValue withVScale(const Value *VScale) const { + CastedValue C(VScale, ZExtBits, SExtBits, TruncBits); + C.V = VScaleSentinel; + return C; + } + /// Replace V with zext(NewV) CastedValue withZExtOfValue(const Value *NewV) const { - unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() - - NewV->getType()->getPrimitiveSizeInBits(); + assert(V != VScaleSentinel); + unsigned ExtendBy = + OriginalBits - NewV->getType()->getPrimitiveSizeInBits(); if (ExtendBy <= TruncBits) return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy); @@ -286,8 +305,9 @@ struct CastedValue { /// Replace V with sext(NewV) CastedValue withSExtOfValue(const Value *NewV) const { - unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() - - NewV->getType()->getPrimitiveSizeInBits(); + assert(V != VScaleSentinel); + unsigned ExtendBy = + OriginalBits - NewV->getType()->getPrimitiveSizeInBits(); if (ExtendBy <= TruncBits) return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy); @@ -297,8 +317,7 @@ struct CastedValue { } APInt evaluateWith(APInt N) const { - assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() && - "Incompatible bit width"); + assert(N.getBitWidth() == OriginalBits && "Incompatible bit width"); if (TruncBits) N = N.trunc(N.getBitWidth() - TruncBits); if (SExtBits) N = N.sext(N.getBitWidth() + SExtBits); if (ZExtBits) N = N.zext(N.getBitWidth() + ZExtBits); @@ -306,8 +325,7 @@ struct CastedValue { } ConstantRange evaluateWith(ConstantRange N) const { - assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() && - "Incompatible bit width"); + assert(N.getBitWidth() == OriginalBits && "Incompatible bit width"); if (TruncBits) N = N.truncate(N.getBitWidth() - TruncBits); if (SExtBits) N = N.signExtend(N.getBitWidth() + SExtBits); if (ZExtBits) N = N.zeroExtend(N.getBitWidth() + ZExtBits); @@ -364,6 +382,12 @@ static LinearExpression GetLinearExpression( if (Depth == 6) return Val; + // If llvm.vscale is matched, set linear expression with the VScaleSentinel + // and scale 1, offset 0 + if (match(Val.V, m_VScale())) + return LinearExpression(Val.withVScale(Val.V), APInt(Val.getBitWidth(), 1), + APInt(Val.getBitWidth(), 0), true); + if (const ConstantInt *Const = dyn_cast(Val.V)) return LinearExpression(Val, APInt(Val.getBitWidth(), 0), Val.evaluateWith(Const->getValue()), true); @@ -490,18 +514,17 @@ struct VariableGEPIndex { return Scale == Other.Scale; } + bool isVScale() const { return Val.V == VScaleSentinel; } + void dump() const { print(dbgs()); dbgs() << "\n"; } void print(raw_ostream &OS) const { - OS << "(V=" << Val.V->getName() - << ", zextbits=" << Val.ZExtBits - << ", sextbits=" << Val.SExtBits - << ", truncbits=" << Val.TruncBits - << ", scale=" << Scale - << ", nsw=" << IsNSW - << ", negated=" << IsNegated << ")"; + OS << "(V=" << (isVScale() ? "vscale" : Val.V->getName()) + << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits + << ", truncbits=" << Val.TruncBits << ", scale=" << Scale + << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")"; } }; } @@ -633,27 +656,19 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, continue; } + TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL); + const bool ScalableGEP = isa(GTI.getIndexedType()); // For an array/pointer, add the element offset, explicitly scaled. + // Skip adding to constant offset if GEP index is marked as scalable + // they are handled below as variable offset if (const ConstantInt *CIdx = dyn_cast(Index)) { if (CIdx->isZero()) continue; - - // Don't attempt to analyze GEPs if the scalable index is not zero. - TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL); - if (AllocTypeSize.isScalable()) { - Decomposed.Base = V; - return Decomposed; + if (!ScalableGEP) { + Decomposed.Offset += AllocTypeSize.getFixedValue() * + CIdx->getValue().sextOrTrunc(MaxIndexSize); + continue; } - - Decomposed.Offset += AllocTypeSize.getFixedValue() * - CIdx->getValue().sextOrTrunc(MaxIndexSize); - continue; - } - - TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL); - if (AllocTypeSize.isScalable()) { - Decomposed.Base = V; - return Decomposed; } GepHasConstantOffset = false; @@ -663,11 +678,33 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, unsigned Width = Index->getType()->getIntegerBitWidth(); unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0; unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0; - LinearExpression LE = GetLinearExpression( - CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT); + // Scalable GEP decomposition + // Allow Scalable GEP to be decomposed in the case of + // 1. getelementptr <4 x vscale x i32> with 1st index as a constant + // 2. Index which have a leaf of @llvm.vscale + // In both cases, essentially CastedValue of VariableGEPIndex is VScale, + // however the intrinsic cannot be used as V in both cases, a + // VScaleSentinel is used to represent both. + LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits)); + if (ScalableGEP) { + if (const ConstantInt *CIdx = dyn_cast(Index)) { + LE = LinearExpression(LE.Val.withVScale(CIdx), + LE.Val.evaluateWith(CIdx->getValue()), + APInt(LE.Val.getBitWidth(), 0), true); + assert(LE.Offset.isZero() && "For Scalable GEP constant first index, " + "the offset of LE should be 0"); + } else { + // if first index is not a constant, a single variable gep will + // contain 2 variables, bail in this case + Decomposed.Base = V; + return Decomposed; + } + } else { + LE = GetLinearExpression(LE.Val, DL, 0, AC, DT); + } // Scale by the type size. - unsigned TypeSize = AllocTypeSize.getFixedValue(); + unsigned TypeSize = AllocTypeSize.getKnownMinValue(); LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds()); Decomposed.Offset += LE.Offset.sext(MaxIndexSize); APInt Scale = LE.Scale.sext(MaxIndexSize); @@ -1074,8 +1111,6 @@ AliasResult BasicAAResult::aliasGEP( // If an inbounds GEP would have to start from an out of bounds address // for the two to alias, then we can assume noalias. - // TODO: Remove !isScalable() once BasicAA fully support scalable location - // size if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() && V2Size.hasValue() && !V2Size.isScalable() && DecompGEP1.Offset.sge(V2Size.getValue()) && @@ -1110,10 +1145,6 @@ AliasResult BasicAAResult::aliasGEP( return BaseAlias; } - // Bail on analysing scalable LocationSize - if (V1Size.isScalable() || V2Size.isScalable()) - return AliasResult::MayAlias; - // If there is a constant difference between the pointers, but the difference // is less than the size of the associated memory object, then we know // that the objects are partially overlapping. If the difference is @@ -1140,7 +1171,7 @@ AliasResult BasicAAResult::aliasGEP( Off = -Off; } - if (!VLeftSize.hasValue()) + if (!VLeftSize.hasValue() || VLeftSize.isScalable()) return AliasResult::MayAlias; const uint64_t LSize = VLeftSize.getValue(); @@ -1148,8 +1179,8 @@ AliasResult BasicAAResult::aliasGEP( // Conservatively drop processing if a phi was visited and/or offset is // too big. AliasResult AR = AliasResult::PartialAlias; - if (VRightSize.hasValue() && Off.ule(INT32_MAX) && - (Off + VRightSize.getValue()).ule(LSize)) { + if (VRightSize.hasValue() && !VRightSize.isScalable() && + Off.ule(INT32_MAX) && (Off + VRightSize.getValue()).ule(LSize)) { // Memory referenced by right pointer is nested. Save the offset in // cache. Note that originally offset estimated as GEP1-V2, but // AliasResult contains the shift that represents GEP1+Offset=V2. @@ -1165,6 +1196,10 @@ AliasResult BasicAAResult::aliasGEP( if (!V1Size.hasValue() || !V2Size.hasValue()) return AliasResult::MayAlias; + // Bail on Scalable location size from now onwards + if (V1Size.isScalable() || V2Size.isScalable()) + return AliasResult::MayAlias; + APInt GCD; ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset); for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { @@ -1180,10 +1215,14 @@ AliasResult BasicAAResult::aliasGEP( else GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs()); - ConstantRange CR = computeConstantRange(Index.Val.V, /* ForSigned */ false, - true, &AC, Index.CxtI); - KnownBits Known = - computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT); + ConstantRange CR = + Index.isVScale() + ? getVScaleRange(&F, OffsetRange.getBitWidth()) + : computeConstantRange(Index.Val.V, /* ForSigned */ false, true, + &AC, Index.CxtI); + KnownBits Known = Index.isVScale() ? KnownBits(OffsetRange.getBitWidth()) + : computeKnownBits(Index.Val.V, DL, 0, + &AC, Index.CxtI, DT); CR = CR.intersectWith( ConstantRange::fromKnownBits(Known, /* Signed */ true), ConstantRange::Signed); @@ -1231,7 +1270,7 @@ AliasResult BasicAAResult::aliasGEP( if (DecompGEP1.VarIndices.size() == 1) { // VarIndex = Scale*V. const VariableGEPIndex &Var = DecompGEP1.VarIndices[0]; - if (Var.Val.TruncBits == 0 && + if (Var.Val.TruncBits == 0 && !Var.isVScale() && isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) { // Check if abs(V*Scale) >= abs(Scale) holds in the presence of // potentially wrapping math. @@ -1239,11 +1278,10 @@ AliasResult BasicAAResult::aliasGEP( if (Var.IsNSW) return true; - int ValOrigBW = Var.Val.V->getType()->getPrimitiveSizeInBits(); // If Scale is small enough so that abs(V*Scale) >= abs(Scale) holds. // The max value of abs(V) is 2^ValOrigBW - 1. Multiplying with a // constant smaller than 2^(bitwidth(Val) - ValOrigBW) won't wrap. - int MaxScaleValueBW = Var.Val.getBitWidth() - ValOrigBW; + int MaxScaleValueBW = Var.Val.getBitWidth() - Var.Val.OriginalBits; if (MaxScaleValueBW <= 0) return false; return Var.Scale.ule( @@ -1264,6 +1302,7 @@ AliasResult BasicAAResult::aliasGEP( const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0]; const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1]; if (Var0.hasNegatedScaleOf(Var1) && Var0.Val.TruncBits == 0 && + !Var0.isVScale() && !Var1.isVScale() && Var0.Val.hasSameCastsAs(Var1.Val) && !AAQI.MayBeCrossIteration && isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr, DT)) @@ -1752,7 +1791,10 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP, bool Found = false; for (auto I : enumerate(DestGEP.VarIndices)) { VariableGEPIndex &Dest = I.value(); - if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) || + if (Dest.isVScale() != Src.isVScale()) + continue; + if ((!Dest.isVScale() && + !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) || !Dest.Val.hasSameCastsAs(Src.Val)) continue; @@ -1799,8 +1841,8 @@ bool BasicAAResult::constantOffsetHeuristic(const DecomposedGEP &GEP, const VariableGEPIndex &Var0 = GEP.VarIndices[0], &Var1 = GEP.VarIndices[1]; - if (Var0.Val.TruncBits != 0 || !Var0.Val.hasSameCastsAs(Var1.Val) || - !Var0.hasNegatedScaleOf(Var1) || + if (Var0.isVScale() || Var1.isVScale() || Var0.Val.TruncBits != 0 || + !Var0.Val.hasSameCastsAs(Var1.Val) || !Var0.hasNegatedScaleOf(Var1) || Var0.Val.V->getType() != Var1.Val.V->getType()) return false; diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll index 8a83645ddaf9a..ee67f7c15fb41 100644 --- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll +++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll @@ -34,7 +34,8 @@ define void @ss2(ptr %p) { ret void } ; CHECK-LABEL: Alias sets for function 'son': -; CHECK: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (ptr %g, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8)) +; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (ptr %g, LocationSize::precise(vscale x 16)) +; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (ptr %p, LocationSize::precise(8)) define void @son(ptr %p) { %g = getelementptr i8, ptr %p, i64 8 store zeroinitializer, ptr %g, align 2 diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll index 0d6d8fea392bb..8e90a1462722b 100644 --- a/llvm/test/Analysis/BasicAA/vscale.ll +++ b/llvm/test/Analysis/BasicAA/vscale.ll @@ -19,8 +19,7 @@ define void @gep_alloca_const_offset_1() { ; CHECK-LABEL: gep_alloca_const_offset_2 ; CHECK-DAG: MayAlias: * %alloc, * %gep1 ; CHECK-DAG: MayAlias: * %alloc, * %gep2 -; TODO: AliasResult for gep1,gep2 can be improved as MustAlias -; CHECK-DAG: MayAlias: * %gep1, * %gep2 +; CHECK-DAG: MustAlias: * %gep1, * %gep2 define void @gep_alloca_const_offset_2() { %alloc = alloca %gep1 = getelementptr , ptr %alloc, i64 1 @@ -76,8 +75,7 @@ define void @gep_alloca_symbolic_offset(i64 %idx1, i64 %idx2) { ; CHECK-LABEL: gep_same_base_const_offset ; CHECK-DAG: MayAlias: i32* %gep1, * %p ; CHECK-DAG: MayAlias: i32* %gep2, * %p -; TODO: AliasResult for gep1,gep2 can be improved as NoAlias -; CHECK-DAG: MayAlias: i32* %gep1, i32* %gep2 +; CHECK-DAG: NoAlias: i32* %gep1, i32* %gep2 define void @gep_same_base_const_offset(ptr %p) { %gep1 = getelementptr , ptr %p, i64 1, i64 0 %gep2 = getelementptr , ptr %p, i64 1, i64 1 @@ -117,16 +115,46 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) { ret void } +; getelementptr @llvm.vscale tests +; CHECK-LABEL: gep_llvm_vscale_no_alias +; CHECK-DAG: MayAlias: * %gep1, * %gep2 +; CHECK-DAG: MustAlias: * %gep1, * %gep3 +; CHECK-DAG: MayAlias: * %gep2, * %gep3 +define void @gep_llvm_vscale_no_alias(ptr %p) { + %t1 = tail call i64 @llvm.vscale.i64() + %t2 = shl nuw nsw i64 %t1, 3 + %gep1 = getelementptr i32, ptr %p, i64 %t2 + %gep2 = getelementptr , ptr %p, i64 1 + %gep3 = getelementptr , ptr %p, i64 2 + load , ptr %gep1 + load , ptr %gep2 + load , ptr %gep3 + ret void +} + +declare i64 @llvm.vscale.i64() + +; CHECK-LABEL: gep_llvm_vscale_squared_may_alias +; CHECK-DAG: MayAlias: * %gep1, * %gep2 +define void @gep_llvm_vscale_squared_may_alias(ptr %p) { + %t1 = tail call i64 @llvm.vscale.i64() + %gep1 = getelementptr , ptr %p, i64 %t1 + %gep2 = getelementptr i32, ptr %p, i64 1 + load , ptr %gep1 + load , ptr %gep2 + ret void +} + ; getelementptr + bitcast ; CHECK-LABEL: gep_bitcast_1 ; CHECK-DAG: MustAlias: i32* %p, * %p ; CHECK-DAG: MayAlias: i32* %gep1, * %p -; CHECK-DAG: MayAlias: i32* %gep1, i32* %p +; CHECK-DAG: NoAlias: i32* %gep1, i32* %p ; CHECK-DAG: MayAlias: i32* %gep2, * %p ; CHECK-DAG: MayAlias: i32* %gep1, i32* %gep2 ; CHECK-DAG: NoAlias: i32* %gep2, i32* %p -define void @gep_bitcast_1(ptr %p) { +define void @gep_bitcast_1(ptr %p) vscale_range(1,16) { %gep1 = getelementptr , ptr %p, i64 1, i64 0 %gep2 = getelementptr i32, ptr %p, i64 4 load , ptr %p @@ -141,9 +169,9 @@ define void @gep_bitcast_1(ptr %p) { ; CHECK-DAG: MayAlias: i32* %gep1, * %p ; CHECK-DAG: MayAlias: i32* %gep1, * %p ; CHECK-DAG: MayAlias: float* %gep2, * %p -; CHECK-DAG: MayAlias: i32* %gep1, float* %gep2 +; CHECK-DAG: MustAlias: i32* %gep1, float* %gep2 ; CHECK-DAG: MayAlias: float* %gep2, * %p -define void @gep_bitcast_2(ptr %p) { +define void @gep_bitcast_2(ptr %p) vscale_range(1,16) { %gep1 = getelementptr , ptr %p, i64 1, i64 0 %gep2 = getelementptr , ptr %p, i64 1, i64 0 load i32, ptr %gep1 @@ -153,6 +181,132 @@ define void @gep_bitcast_2(ptr %p) { ret void } +; negative offset tests + +; CHECK-LABEL: gep_neg_notscalable +; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %p +; CHECK-DAG: MayAlias: <4 x i32>* %m16, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16m16 +; CHECK-DAG: NoAlias: <4 x i32>* %vm16, <4 x i32>* %vm16m16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %vm16m16 +; CHECK-DAG: MayAlias: <4 x i32>* %m16pv16, <4 x i32>* %p +; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %m16pv16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16m16 +define void @gep_neg_notscalable(ptr %p) vscale_range(1,16) { + %vm16 = getelementptr , ptr %p, i64 -1 + %m16 = getelementptr <4 x i32>, ptr %p, i64 -1 + %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1 + %m16pv16 = getelementptr , ptr %m16, i64 1 + load <4 x i32>, ptr %p + load <4 x i32>, ptr %vm16 + load <4 x i32>, ptr %m16 + load <4 x i32>, ptr %vm16m16 + load <4 x i32>, ptr %m16pv16 + ret void +} + +; CHECK-LABEL: gep_neg_scalable +; CHECK-DAG: MayAlias: * %p, * %vm16 +; CHECK-DAG: MayAlias: * %m16, * %p +; CHECK-DAG: MayAlias: * %m16, * %vm16 +; CHECK-DAG: MayAlias: * %p, * %vm16m16 +; CHECK-DAG: MayAlias: * %vm16, * %vm16m16 +; CHECK-DAG: MayAlias: * %m16, * %vm16m16 +; CHECK-DAG: MustAlias: * %m16pv16, * %p +; CHECK-DAG: MayAlias: * %m16pv16, * %vm16 +; CHECK-DAG: MayAlias: * %m16, * %m16pv16 +; CHECK-DAG: MayAlias: * %m16pv16, * %vm16m16 +define void @gep_neg_scalable(ptr %p) vscale_range(1,16) { + %vm16 = getelementptr , ptr %p, i64 -1 + %m16 = getelementptr <4 x i32>, ptr %p, i64 -1 + %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1 + %m16pv16 = getelementptr , ptr %vm16, i64 1 + load , ptr %p + load , ptr %vm16 + load , ptr %m16 + load , ptr %vm16m16 + load , ptr %m16pv16 + ret void +} + +; CHECK-LABEL: gep_pos_notscalable +; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %p +; CHECK-DAG: MayAlias: <4 x i32>* %m16, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16m16 +; CHECK-DAG: NoAlias: <4 x i32>* %vm16, <4 x i32>* %vm16m16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %vm16m16 +; CHECK-DAG: MustAlias: <4 x i32>* %m16pv16, <4 x i32>* %p +; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %m16pv16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16m16 +define void @gep_pos_notscalable(ptr %p) vscale_range(1,16) { + %vm16 = getelementptr , ptr %p, i64 1 + %m16 = getelementptr <4 x i32>, ptr %p, i64 1 + %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1 + %m16pv16 = getelementptr , ptr %vm16, i64 -1 + load <4 x i32>, ptr %p + load <4 x i32>, ptr %vm16 + load <4 x i32>, ptr %m16 + load <4 x i32>, ptr %vm16m16 + load <4 x i32>, ptr %m16pv16 + ret void +} + +; CHECK-LABEL: gep_pos_scalable +; CHECK-DAG: MayAlias: * %p, * %vm16 +; CHECK-DAG: MayAlias: * %m16, * %p +; CHECK-DAG: MayAlias: * %m16, * %vm16 +; CHECK-DAG: MayAlias: * %p, * %vm16m16 +; CHECK-DAG: MayAlias: * %vm16, * %vm16m16 +; CHECK-DAG: MayAlias: * %m16, * %vm16m16 +; CHECK-DAG: MustAlias: * %m16pv16, * %p +; CHECK-DAG: MayAlias: * %m16pv16, * %vm16 +; CHECK-DAG: MayAlias: * %m16, * %m16pv16 +; CHECK-DAG: MayAlias: * %m16pv16, * %vm16m16 +define void @gep_pos_scalable(ptr %p) vscale_range(1,16) { + %vm16 = getelementptr , ptr %p, i64 1 + %m16 = getelementptr <4 x i32>, ptr %p, i64 1 + %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1 + %m16pv16 = getelementptr , ptr %vm16, i64 -1 + load , ptr %p + load , ptr %vm16 + load , ptr %m16 + load , ptr %vm16m16 + load , ptr %m16pv16 + ret void +} + +; CHECK-LABEL: v1v2types +; CHECK-DAG: MustAlias: <4 x i32>* %p, * %p +; CHECK-DAG: MayAlias: * %p, * %vm16 +; CHECK-DAG: MayAlias: <4 x i32>* %p, * %vm16 +; CHECK-DAG: MayAlias: * %p, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16 +; CHECK-DAG: MustAlias: <4 x i32>* %vm16, * %vm16 +; CHECK-DAG: MayAlias: * %m16, * %p +; CHECK-DAG: MayAlias: * %m16, <4 x i32>* %p +; CHECK-DAG: MayAlias: * %m16, * %vm16 +; CHECK-DAG: MayAlias: * %m16, <4 x i32>* %vm16 +; CHECK-DAG: NoAlias: <4 x i32>* %m16, * %p +; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %p +; CHECK-DAG: MayAlias: <4 x i32>* %m16, * %vm16 +; CHECK-DAG: MayAlias: <4 x i32>* %m16, <4 x i32>* %vm16 +; CHECK-DAG: MustAlias: <4 x i32>* %m16, * %m16 +define void @v1v2types(ptr %p) vscale_range(1,16) { + %vm16 = getelementptr , ptr %p, i64 -1 + %m16 = getelementptr <4 x i32>, ptr %p, i64 -1 + load , ptr %p + load <4 x i32>, ptr %p + load , ptr %vm16 + load <4 x i32>, ptr %vm16 + load , ptr %m16 + load <4 x i32>, ptr %m16 + ret void +} + ; getelementptr recursion ; CHECK-LABEL: gep_recursion_level_1 @@ -174,8 +328,8 @@ define void @gep_recursion_level_1(ptr %a, ptr %p) { ; CHECK-LABEL: gep_recursion_level_1_bitcast ; CHECK-DAG: MustAlias: i32* %a, * %a -; CHECK-DAG: MayAlias: i32* %a, i32* %gep -; CHECK-DAG: MayAlias: i32* %a, i32* %gep_rec_1 +; CHECK-DAG: NoAlias: i32* %a, i32* %gep +; CHECK-DAG: NoAlias: i32* %a, i32* %gep_rec_1 ; CHECK-DAG: MayAlias: * %a, i32* %gep ; CHECK-DAG: MayAlias: * %a, i32* %gep_rec_1 ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_1 @@ -233,22 +387,22 @@ define void @gep_recursion_level_2(ptr %a, ptr %p) { ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_3 ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_2 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_3 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_1, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_1, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_3 ; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_2, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_2, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_3, i32* %gep_rec_4 ; CHECK-DAG: NoAlias: i32* %gep_rec_3, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_3, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_3, i32* %gep_rec_6 ; CHECK-DAG: NoAlias: i32* %gep_rec_4, i32* %gep_rec_5 -; CHECK-DAG: NoAlias: i32* %gep_rec_4, i32* %gep_rec_6 -; CHECK-DAG: NoAlias: i32* %gep_rec_5, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_4, i32* %gep_rec_6 +; CHECK-DAG: MayAlias: i32* %gep_rec_5, i32* %gep_rec_6 ; GEP max lookup depth was set to 6. define void @gep_recursion_max_lookup_depth_reached(ptr %a, ptr %p) { %gep = getelementptr , ptr %p, i64 1, i64 2 diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll index 71adaed8e5722..4a7f2acdbf8a9 100644 --- a/llvm/test/Transforms/GVN/vscale.ll +++ b/llvm/test/Transforms/GVN/vscale.ll @@ -84,10 +84,7 @@ define i32 @load_clobber_load_gep3(ptr %p) { ; CHECK-LABEL: @load_clobber_load_gep3( ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 1, i64 0 -; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[CAST:%.*]] = bitcast float [[LOAD2]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]] ; CHECK-NEXT: ret i32 [[ADD]] ; %gep1 = getelementptr , ptr %p, i64 1, i64 0 @@ -277,8 +274,7 @@ define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) { ; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: store i32 [[T]], ptr [[Q:%.*]], align 4 +; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4 ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: ret void