diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a4a0846df7af1..b1cbb048e0a25 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -63,6 +64,7 @@
 #define DEBUG_TYPE "basicaa"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
@@ -251,31 +253,48 @@ void EarliestEscapeInfo::removeInstruction(Instruction *I) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// Represents zext(sext(trunc(V))).
+
+// VScale can be introduced from both gep offsets and uses of by llvm.vscale.
+// This value represents vscale in VariableGEPIndex, so the two can be used in
+// similar ways.
+const Value *VScaleSentinel = reinterpret_cast<const Value *>(-1);
+
+/// Represents zext(sext(trunc(V))). For vscale V uses VScaleSentinel.
 struct CastedValue {
   const Value *V;
   unsigned ZExtBits = 0;
   unsigned SExtBits = 0;
   unsigned TruncBits = 0;
+  unsigned OriginalBits;
 
-  explicit CastedValue(const Value *V) : V(V) {}
+  explicit CastedValue(const Value *V) : V(V) {
+    OriginalBits = V->getType()->getPrimitiveSizeInBits();
+  }
   explicit CastedValue(const Value *V, unsigned ZExtBits, unsigned SExtBits,
                        unsigned TruncBits)
-      : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits), TruncBits(TruncBits) {}
+      : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits), TruncBits(TruncBits) {
+    OriginalBits = V->getType()->getPrimitiveSizeInBits();
+  }
 
   unsigned getBitWidth() const {
-    return V->getType()->getPrimitiveSizeInBits() - TruncBits + ZExtBits +
-           SExtBits;
+    return OriginalBits - TruncBits + ZExtBits + SExtBits;
   }
 
   CastedValue withValue(const Value *NewV) const {
     return CastedValue(NewV, ZExtBits, SExtBits, TruncBits);
   }
 
+  CastedValue withVScale(const Value *VScale) const {
+    CastedValue C(VScale, ZExtBits, SExtBits, TruncBits);
+    C.V = VScaleSentinel;
+    return C;
+  }
+
   /// Replace V with zext(NewV)
   CastedValue withZExtOfValue(const Value *NewV) const {
-    unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
-                        NewV->getType()->getPrimitiveSizeInBits();
+    assert(V != VScaleSentinel);
+    unsigned ExtendBy =
+        OriginalBits - NewV->getType()->getPrimitiveSizeInBits();
     if (ExtendBy <= TruncBits)
       return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy);
 
@@ -286,8 +305,9 @@ struct CastedValue {
 
   /// Replace V with sext(NewV)
   CastedValue withSExtOfValue(const Value *NewV) const {
-    unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
-                        NewV->getType()->getPrimitiveSizeInBits();
+    assert(V != VScaleSentinel);
+    unsigned ExtendBy =
+        OriginalBits - NewV->getType()->getPrimitiveSizeInBits();
     if (ExtendBy <= TruncBits)
       return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy);
 
@@ -297,8 +317,7 @@ struct CastedValue {
   }
 
   APInt evaluateWith(APInt N) const {
-    assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
-           "Incompatible bit width");
+    assert(N.getBitWidth() == OriginalBits && "Incompatible bit width");
     if (TruncBits) N = N.trunc(N.getBitWidth() - TruncBits);
     if (SExtBits) N = N.sext(N.getBitWidth() + SExtBits);
     if (ZExtBits) N = N.zext(N.getBitWidth() + ZExtBits);
@@ -306,8 +325,7 @@ struct CastedValue {
   }
 
   ConstantRange evaluateWith(ConstantRange N) const {
-    assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
-           "Incompatible bit width");
+    assert(N.getBitWidth() == OriginalBits && "Incompatible bit width");
     if (TruncBits) N = N.truncate(N.getBitWidth() - TruncBits);
     if (SExtBits) N = N.signExtend(N.getBitWidth() + SExtBits);
     if (ZExtBits) N = N.zeroExtend(N.getBitWidth() + ZExtBits);
@@ -364,6 +382,12 @@ static LinearExpression GetLinearExpression(
   if (Depth == 6)
     return Val;
 
+  // If llvm.vscale is matched, set linear expression with the VScaleSentinel
+  // and scale 1, offset 0
+  if (match(Val.V, m_VScale()))
+    return LinearExpression(Val.withVScale(Val.V), APInt(Val.getBitWidth(), 1),
+                            APInt(Val.getBitWidth(), 0), true);
+
   if (const ConstantInt *Const = dyn_cast<ConstantInt>(Val.V))
     return LinearExpression(Val, APInt(Val.getBitWidth(), 0),
                             Val.evaluateWith(Const->getValue()), true);
@@ -490,18 +514,17 @@ struct VariableGEPIndex {
     return Scale == Other.Scale;
   }
 
+  bool isVScale() const { return Val.V == VScaleSentinel; }
+
   void dump() const {
     print(dbgs());
     dbgs() << "\n";
   }
   void print(raw_ostream &OS) const {
-    OS << "(V=" << Val.V->getName()
-       << ", zextbits=" << Val.ZExtBits
-       << ", sextbits=" << Val.SExtBits
-       << ", truncbits=" << Val.TruncBits
-       << ", scale=" << Scale
-       << ", nsw=" << IsNSW
-       << ", negated=" << IsNegated << ")";
+    OS << "(V=" << (isVScale() ? "vscale" : Val.V->getName())
+       << ", zextbits=" << Val.ZExtBits << ", sextbits=" << Val.SExtBits
+       << ", truncbits=" << Val.TruncBits << ", scale=" << Scale
+       << ", nsw=" << IsNSW << ", negated=" << IsNegated << ")";
   }
 };
 }
@@ -633,27 +656,19 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
         continue;
       }
 
+      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
+      const bool ScalableGEP = isa<ScalableVectorType>(GTI.getIndexedType());
       // For an array/pointer, add the element offset, explicitly scaled.
+      // Skip adding to constant offset if GEP index is marked as scalable
+      // they are handled below as variable offset
       if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero())
           continue;
-
-        // Don't attempt to analyze GEPs if the scalable index is not zero.
-        TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
-        if (AllocTypeSize.isScalable()) {
-          Decomposed.Base = V;
-          return Decomposed;
+        if (!ScalableGEP) {
+          Decomposed.Offset += AllocTypeSize.getFixedValue() *
+                               CIdx->getValue().sextOrTrunc(MaxIndexSize);
+          continue;
         }
-
-        Decomposed.Offset += AllocTypeSize.getFixedValue() *
-                             CIdx->getValue().sextOrTrunc(MaxIndexSize);
-        continue;
-      }
-
-      TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL);
-      if (AllocTypeSize.isScalable()) {
-        Decomposed.Base = V;
-        return Decomposed;
       }
 
       GepHasConstantOffset = false;
@@ -663,11 +678,33 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
       unsigned Width = Index->getType()->getIntegerBitWidth();
       unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
       unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
-      LinearExpression LE = GetLinearExpression(
-          CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+      // Scalable GEP decomposition
+      // Allow Scalable GEP to be decomposed in the case of
+      //    1. getelementptr <4 x vscale x i32> with 1st index as a constant
+      //    2. Index which have a leaf of @llvm.vscale
+      // In both cases, essentially CastedValue of VariableGEPIndex is VScale,
+      // however the intrinsic cannot be used as V in both cases, a
+      // VScaleSentinel is used to represent both.
+      LinearExpression LE(CastedValue(Index, 0, SExtBits, TruncBits));
+      if (ScalableGEP) {
+        if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+          LE = LinearExpression(LE.Val.withVScale(CIdx),
+                                LE.Val.evaluateWith(CIdx->getValue()),
+                                APInt(LE.Val.getBitWidth(), 0), true);
+          assert(LE.Offset.isZero() && "For Scalable GEP constant first index, "
+                                       "the offset of LE should be 0");
+        } else {
+          // if first index is not a constant, a single variable gep will
+          // contain 2 variables, bail in this case
+          Decomposed.Base = V;
+          return Decomposed;
+        }
+      } else {
+        LE = GetLinearExpression(LE.Val, DL, 0, AC, DT);
+      }
 
       // Scale by the type size.
-      unsigned TypeSize = AllocTypeSize.getFixedValue();
+      unsigned TypeSize = AllocTypeSize.getKnownMinValue();
       LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
       Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
       APInt Scale = LE.Scale.sext(MaxIndexSize);
@@ -1074,8 +1111,6 @@ AliasResult BasicAAResult::aliasGEP(
 
   // If an inbounds GEP would have to start from an out of bounds address
   // for the two to alias, then we can assume noalias.
-  // TODO: Remove !isScalable() once BasicAA fully support scalable location
-  // size
   if (*DecompGEP1.InBounds && DecompGEP1.VarIndices.empty() &&
       V2Size.hasValue() && !V2Size.isScalable() &&
       DecompGEP1.Offset.sge(V2Size.getValue()) &&
@@ -1110,10 +1145,6 @@ AliasResult BasicAAResult::aliasGEP(
     return BaseAlias;
   }
 
-  // Bail on analysing scalable LocationSize
-  if (V1Size.isScalable() || V2Size.isScalable())
-    return AliasResult::MayAlias;
-
   // If there is a constant difference between the pointers, but the difference
   // is less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.  If the difference is
@@ -1140,7 +1171,7 @@ AliasResult BasicAAResult::aliasGEP(
       Off = -Off;
     }
 
-    if (!VLeftSize.hasValue())
+    if (!VLeftSize.hasValue() || VLeftSize.isScalable())
       return AliasResult::MayAlias;
 
     const uint64_t LSize = VLeftSize.getValue();
@@ -1148,8 +1179,8 @@ AliasResult BasicAAResult::aliasGEP(
       // Conservatively drop processing if a phi was visited and/or offset is
       // too big.
       AliasResult AR = AliasResult::PartialAlias;
-      if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
-          (Off + VRightSize.getValue()).ule(LSize)) {
+      if (VRightSize.hasValue() && !VRightSize.isScalable() &&
+          Off.ule(INT32_MAX) && (Off + VRightSize.getValue()).ule(LSize)) {
         // Memory referenced by right pointer is nested. Save the offset in
         // cache. Note that originally offset estimated as GEP1-V2, but
         // AliasResult contains the shift that represents GEP1+Offset=V2.
@@ -1165,6 +1196,10 @@ AliasResult BasicAAResult::aliasGEP(
   if (!V1Size.hasValue() || !V2Size.hasValue())
     return AliasResult::MayAlias;
 
+  // Bail on Scalable location size from now onwards
+  if (V1Size.isScalable() || V2Size.isScalable())
+    return AliasResult::MayAlias;
+
   APInt GCD;
   ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset);
   for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
@@ -1180,10 +1215,14 @@ AliasResult BasicAAResult::aliasGEP(
     else
       GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
 
-    ConstantRange CR = computeConstantRange(Index.Val.V, /* ForSigned */ false,
-                                            true, &AC, Index.CxtI);
-    KnownBits Known =
-        computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT);
+    ConstantRange CR =
+        Index.isVScale()
+            ? getVScaleRange(&F, OffsetRange.getBitWidth())
+            : computeConstantRange(Index.Val.V, /* ForSigned */ false, true,
+                                   &AC, Index.CxtI);
+    KnownBits Known = Index.isVScale() ? KnownBits(OffsetRange.getBitWidth())
+                                       : computeKnownBits(Index.Val.V, DL, 0,
+                                                          &AC, Index.CxtI, DT);
     CR = CR.intersectWith(
         ConstantRange::fromKnownBits(Known, /* Signed */ true),
         ConstantRange::Signed);
@@ -1231,7 +1270,7 @@ AliasResult BasicAAResult::aliasGEP(
   if (DecompGEP1.VarIndices.size() == 1) {
     // VarIndex = Scale*V.
     const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
-    if (Var.Val.TruncBits == 0 &&
+    if (Var.Val.TruncBits == 0 && !Var.isVScale() &&
         isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) {
       // Check if abs(V*Scale) >= abs(Scale) holds in the presence of
       // potentially wrapping math.
@@ -1239,11 +1278,10 @@ AliasResult BasicAAResult::aliasGEP(
         if (Var.IsNSW)
           return true;
 
-        int ValOrigBW = Var.Val.V->getType()->getPrimitiveSizeInBits();
         // If Scale is small enough so that abs(V*Scale) >= abs(Scale) holds.
         // The max value of abs(V) is 2^ValOrigBW - 1. Multiplying with a
         // constant smaller than 2^(bitwidth(Val) - ValOrigBW) won't wrap.
-        int MaxScaleValueBW = Var.Val.getBitWidth() - ValOrigBW;
+        int MaxScaleValueBW = Var.Val.getBitWidth() - Var.Val.OriginalBits;
         if (MaxScaleValueBW <= 0)
           return false;
         return Var.Scale.ule(
@@ -1264,6 +1302,7 @@ AliasResult BasicAAResult::aliasGEP(
     const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
     const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
     if (Var0.hasNegatedScaleOf(Var1) && Var0.Val.TruncBits == 0 &&
+        !Var0.isVScale() && !Var1.isVScale() &&
         Var0.Val.hasSameCastsAs(Var1.Val) && !AAQI.MayBeCrossIteration &&
         isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr,
                         DT))
@@ -1752,7 +1791,10 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
     bool Found = false;
     for (auto I : enumerate(DestGEP.VarIndices)) {
       VariableGEPIndex &Dest = I.value();
-      if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI) ||
+      if (Dest.isVScale() != Src.isVScale())
+        continue;
+      if ((!Dest.isVScale() &&
+           !isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V, AAQI)) ||
           !Dest.Val.hasSameCastsAs(Src.Val))
         continue;
 
@@ -1799,8 +1841,8 @@ bool BasicAAResult::constantOffsetHeuristic(const DecomposedGEP &GEP,
 
   const VariableGEPIndex &Var0 = GEP.VarIndices[0], &Var1 = GEP.VarIndices[1];
 
-  if (Var0.Val.TruncBits != 0 || !Var0.Val.hasSameCastsAs(Var1.Val) ||
-      !Var0.hasNegatedScaleOf(Var1) ||
+  if (Var0.isVScale() || Var1.isVScale() || Var0.Val.TruncBits != 0 ||
+      !Var0.Val.hasSameCastsAs(Var1.Val) || !Var0.hasNegatedScaleOf(Var1) ||
       Var0.Val.V->getType() != Var1.Val.V->getType())
     return false;
 
diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
index 8a83645ddaf9a..ee67f7c15fb41 100644
--- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll
+++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll
@@ -34,7 +34,8 @@ define void @ss2(ptr %p) {
   ret void
 }
 ; CHECK-LABEL: Alias sets for function 'son':
-; CHECK: AliasSet[{{.*}}, 2] may alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %g, LocationSize::precise(vscale x 16))
+; CHECK: AliasSet[{{.*}}, 1] must alias, Mod       Pointers: (ptr %p, LocationSize::precise(8))
 define void @son(ptr %p) {
   %g = getelementptr i8, ptr %p, i64 8
   store <vscale x 2 x i64> zeroinitializer, ptr %g, align 2
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 0d6d8fea392bb..8e90a1462722b 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -19,8 +19,7 @@ define void @gep_alloca_const_offset_1() {
 ; CHECK-LABEL: gep_alloca_const_offset_2
 ; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep1
 ; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %alloc, <vscale x 4 x i32>* %gep2
-; TODO: AliasResult for gep1,gep2 can be improved as MustAlias
-; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG:  MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
 define void @gep_alloca_const_offset_2() {
   %alloc = alloca <vscale x 4 x i32>
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %alloc, i64 1
@@ -76,8 +75,7 @@ define void @gep_alloca_symbolic_offset(i64 %idx1, i64 %idx2) {
 ; CHECK-LABEL: gep_same_base_const_offset
 ; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
-; TODO: AliasResult for gep1,gep2 can be improved as NoAlias
-; CHECK-DAG:  MayAlias:     i32* %gep1, i32* %gep2
+; CHECK-DAG:  NoAlias:      i32* %gep1, i32* %gep2
 define void @gep_same_base_const_offset(ptr %p) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 1
@@ -117,16 +115,46 @@ define void @gep_different_base_const_offset(ptr noalias %p1, ptr noalias %p2) {
   ret void
 }
 
+; getelementptr @llvm.vscale tests
+; CHECK-LABEL: gep_llvm_vscale_no_alias
+; CHECK-DAG: MayAlias:     <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+; CHECK-DAG: MustAlias:    <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep3
+; CHECK-DAG: MayAlias:     <vscale x 4 x i32>* %gep2, <vscale x 4 x i32>* %gep3
+define void @gep_llvm_vscale_no_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %t2 = shl nuw nsw i64 %t1, 3
+  %gep1 = getelementptr i32, ptr %p, i64 %t2
+  %gep2 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %gep3 = getelementptr <vscale x 4 x i32>, ptr %p, i64 2
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  load <vscale x 4 x i32>, ptr %gep3
+  ret void
+}
+
+declare i64 @llvm.vscale.i64()
+
+; CHECK-LABEL: gep_llvm_vscale_squared_may_alias
+; CHECK-DAG: MayAlias:      <vscale x 4 x i32>* %gep1, <vscale x 4 x i32>* %gep2
+define void @gep_llvm_vscale_squared_may_alias(ptr %p) {
+  %t1 = tail call i64 @llvm.vscale.i64()
+  %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 %t1
+  %gep2 = getelementptr i32, ptr %p, i64 1
+  load <vscale x 4 x i32>, ptr %gep1
+  load <vscale x 4 x i32>, ptr %gep2
+  ret void
+}
+
 ; getelementptr + bitcast
 
 ; CHECK-LABEL: gep_bitcast_1
 ; CHECK-DAG:   MustAlias:    i32* %p, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
-; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %p
+; CHECK-DAG:   NoAlias:      i32* %gep1, i32* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep2, <vscale x 4 x i32>* %p
 ; CHECK-DAG:   MayAlias:     i32* %gep1, i32* %gep2
 ; CHECK-DAG:   NoAlias:      i32* %gep2, i32* %p
-define void @gep_bitcast_1(ptr %p) {
+define void @gep_bitcast_1(ptr %p) vscale_range(1,16) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr i32, ptr %p, i64 4
   load <vscale x 4 x i32>, ptr %p
@@ -141,9 +169,9 @@ define void @gep_bitcast_1(ptr %p) {
 ; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x i32>* %p
 ; CHECK-DAG:  MayAlias:     i32* %gep1, <vscale x 4 x float>* %p
 ; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x i32>* %p
-; CHECK-DAG:  MayAlias:     i32* %gep1, float* %gep2
+; CHECK-DAG:  MustAlias:    i32* %gep1, float* %gep2
 ; CHECK-DAG:  MayAlias:     float* %gep2, <vscale x 4 x float>* %p
-define void @gep_bitcast_2(ptr %p) {
+define void @gep_bitcast_2(ptr %p) vscale_range(1,16) {
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
   %gep2 = getelementptr <vscale x 4 x float>, ptr %p, i64 1, i64 0
   load i32, ptr %gep1
@@ -153,6 +181,132 @@ define void @gep_bitcast_2(ptr %p) {
   ret void
 }
 
+; negative offset tests
+
+; CHECK-LABEL: gep_neg_notscalable
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:     <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:     <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:     <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @gep_neg_notscalable(ptr %p) vscale_range(1,16) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %m16, i64 1
+  load <4 x i32>, ptr %p
+  load <4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %m16
+  load <4 x i32>, ptr %vm16m16
+  load <4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_neg_scalable
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_neg_scalable(ptr %p) vscale_range(1,16) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 1
+  load <vscale x 4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <vscale x 4 x i32>, ptr %vm16m16
+  load <vscale x 4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_pos_notscalable
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG:   NoAlias:      <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @gep_pos_notscalable(ptr %p) vscale_range(1,16) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 -1
+  load <4 x i32>, ptr %p
+  load <4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %m16
+  load <4 x i32>, ptr %vm16m16
+  load <4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: gep_pos_scalable
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG:   MustAlias:    <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG:   MayAlias:     <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @gep_pos_scalable(ptr %p) vscale_range(1,16) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+  %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+  %m16pv16 = getelementptr <vscale x 4 x i32>, ptr %vm16, i64 -1
+  load <vscale x 4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <vscale x 4 x i32>, ptr %vm16m16
+  load <vscale x 4 x i32>, ptr %m16pv16
+  ret void
+}
+
+; CHECK-LABEL: v1v2types
+; CHECK-DAG:  MustAlias:    <4 x i32>* %p, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG:  MustAlias:    <4 x i32>* %vm16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:  NoAlias:      <4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG:  NoAlias:      <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG:  MayAlias:     <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG:  MustAlias:    <4 x i32>* %m16, <vscale x 4 x i32>* %m16
+define void @v1v2types(ptr %p) vscale_range(1,16) {
+  %vm16 = getelementptr <vscale x 4 x i32>, ptr %p, i64 -1
+  %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+  load <vscale x 4 x i32>, ptr %p
+  load <4 x i32>, ptr %p
+  load <vscale x 4 x i32>, ptr %vm16
+  load <4 x i32>, ptr %vm16
+  load <vscale x 4 x i32>, ptr %m16
+  load <4 x i32>, ptr %m16
+  ret void
+}
+
 ; getelementptr recursion
 
 ; CHECK-LABEL: gep_recursion_level_1
@@ -174,8 +328,8 @@ define void @gep_recursion_level_1(ptr %a, ptr %p) {
 
 ; CHECK-LABEL: gep_recursion_level_1_bitcast
 ; CHECK-DAG:  MustAlias:    i32* %a, <vscale x 4 x i32>* %a
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep
-; CHECK-DAG:  MayAlias:     i32* %a, i32* %gep_rec_1
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep
+; CHECK-DAG:  NoAlias:      i32* %a, i32* %gep_rec_1
 ; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep
 ; CHECK-DAG:  MayAlias:     <vscale x 4 x i32>* %a, i32* %gep_rec_1
 ; CHECK-DAG:  NoAlias:      i32* %gep, i32* %gep_rec_1
@@ -233,22 +387,22 @@ define void @gep_recursion_level_2(ptr %a, ptr %p) {
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_2
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_1, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_1, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_3
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_2, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_2, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_4
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_3, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_3, i32* %gep_rec_6
 ; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_5
-; CHECK-DAG: NoAlias:      i32* %gep_rec_4, i32* %gep_rec_6
-; CHECK-DAG: NoAlias:      i32* %gep_rec_5, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_4, i32* %gep_rec_6
+; CHECK-DAG: MayAlias:     i32* %gep_rec_5, i32* %gep_rec_6
 ; GEP max lookup depth was set to 6.
 define void @gep_recursion_max_lookup_depth_reached(ptr %a, ptr %p) {
   %gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 2
diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll
index 71adaed8e5722..4a7f2acdbf8a9 100644
--- a/llvm/test/Transforms/GVN/vscale.ll
+++ b/llvm/test/Transforms/GVN/vscale.ll
@@ -84,10 +84,7 @@ define i32 @load_clobber_load_gep3(ptr %p) {
 ; CHECK-LABEL: @load_clobber_load_gep3(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P]], i64 1, i64 0
-; CHECK-NEXT:    [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
-; CHECK-NEXT:    [[CAST:%.*]] = bitcast float [[LOAD2]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
   %gep1 = getelementptr <vscale x 4 x i32>, ptr %p, i64 1, i64 0
@@ -277,8 +274,7 @@ define void @redundant_load_elimination_2(i1 %c, ptr %p, ptr %q) {
 ; CHECK-NEXT:    store i32 1, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[T:%.*]] = load i32, ptr [[GEP1]], align 4
-; CHECK-NEXT:    store i32 [[T]], ptr [[Q:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[Q:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret void