[CGP][AArch64] Rebase the common base offset for better ISel #74046

vfdff · 2023-12-01T07:43:19Z

When all the large const offsets masked with the same value from bit-12 to bit-23 (low imm12 will be encoded in load/store instructions).
Fold

      add     x8, x0, #2031, lsl #12
      add     x8, x8, #960
      ldr     x9, [x8, x8]
      ldr     x8, [x8, #2056]

into

      add     x8, x0, #2031, lsl #12
      ldr     x9, [x8, #960]
      ldr     x8, [x8, #3016]

Fix the multi-use scenes for #71917

llvmbot · 2023-12-01T07:43:47Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-aarch64

Author: Allen (vfdff)

Changes

When all the large const offsets masked with the same value from bit-12 to bit-23.
Fold

      add     x8, x0, #<!-- -->2031, lsl #<!-- -->12
      add     x8, x8, #<!-- -->960
      ldr     x9, [x8, x8]
      ldr     x8, [x8, #<!-- -->2056]

into

      add     x8, x0, #<!-- -->2031, lsl #<!-- -->12
      ldr     x9, [x8, #<!-- -->960]
      ldr     x8, [x8, #<!-- -->3016]

Full diff: https://github.com/llvm/llvm-project/pull/74046.diff

7 Files Affected:

(modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+4)
(modified) llvm/include/llvm/CodeGen/TargetLowering.h (+7-1)
(modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+50-29)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+13)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+3)
(modified) llvm/test/CodeGen/AArch64/arm64-addrmode.ll (+89)
(modified) llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll (+25-32)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index fd3410586e172a8..febe4ed108ef076 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -342,6 +342,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
   }
 
+  int64_t getPreferBaseOffset(int64_t MinOffset, int64_t MaxOffset) {
+    return getTLI()->getPreferBaseOffset(MinOffset, MaxOffset);
+  }
+
   unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
                              Type *ScalarValTy) const {
     auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 77ee6b89ed8a34f..c3d9ea07f73065f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,8 +30,8 @@
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/LowLevelTypeUtils.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -2721,6 +2721,12 @@ class TargetLoweringBase {
                                      Type *Ty, unsigned AddrSpace,
                                      Instruction *I = nullptr) const;
 
+  /// Return the perfered common base offset.
+  virtual int64_t getPreferBaseOffset(int64_t MinOffset,
+                                      int64_t MaxOffset) const {
+    return 0;
+  }
+
   /// Return true if the specified immediate is legal icmp immediate, that is
   /// the target has icmp instructions which can compare a register against the
   /// immediate without having to materialize the immediate into a register.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 885d2d3ce24825b..e4e2dda1724f46c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6121,6 +6121,55 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
     int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
     Value *NewBaseGEP = nullptr;
 
+    auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
+                             GetElementPtrInst *GEP) {
+      LLVMContext &Ctx = GEP->getContext();
+      Type *PtrIdxTy = DL->getIndexType(GEP->getType());
+      Type *I8PtrTy =
+          PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
+      Type *I8Ty = Type::getInt8Ty(Ctx);
+
+      BasicBlock::iterator NewBaseInsertPt;
+      BasicBlock *NewBaseInsertBB;
+      if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
+        // If the base of the struct is an instruction, the new base will be
+        // inserted close to it.
+        NewBaseInsertBB = BaseI->getParent();
+        if (isa<PHINode>(BaseI))
+          NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+        else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
+          NewBaseInsertBB =
+              SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
+          NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+        } else
+          NewBaseInsertPt = std::next(BaseI->getIterator());
+      } else {
+        // If the current base is an argument or global value, the new base
+        // will be inserted to the entry block.
+        NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
+        NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+      }
+      IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
+      // Create a new base.
+      Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
+      NewBaseGEP = OldBase;
+      if (NewBaseGEP->getType() != I8PtrTy)
+        NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
+      NewBaseGEP =
+          NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
+      NewGEPBases.insert(NewBaseGEP);
+      return;
+    };
+
+    // Check whether all the offsets can be encoded with perfered common base.
+    if (int64_t PreferBase = TLI->getPreferBaseOffset(
+            LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
+      BaseOffset = PreferBase;
+      // Create a new base if the offset of the BaseGEP can be decoded with one
+      // instruction.
+      createNewBase(BaseOffset, OldBase, BaseGEP);
+    }
+
     auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
     while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
       GetElementPtrInst *GEP = LargeOffsetGEP->first;
@@ -6153,35 +6202,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
       if (!NewBaseGEP) {
         // Create a new base if we don't have one yet.  Find the insertion
         // pointer for the new base first.
-        BasicBlock::iterator NewBaseInsertPt;
-        BasicBlock *NewBaseInsertBB;
-        if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
-          // If the base of the struct is an instruction, the new base will be
-          // inserted close to it.
-          NewBaseInsertBB = BaseI->getParent();
-          if (isa<PHINode>(BaseI))
-            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
-          else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
-            NewBaseInsertBB =
-                SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
-            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
-          } else
-            NewBaseInsertPt = std::next(BaseI->getIterator());
-        } else {
-          // If the current base is an argument or global value, the new base
-          // will be inserted to the entry block.
-          NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
-          NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
-        }
-        IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
-        // Create a new base.
-        Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
-        NewBaseGEP = OldBase;
-        if (NewBaseGEP->getType() != I8PtrTy)
-          NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
-        NewBaseGEP =
-            NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
-        NewGEPBases.insert(NewBaseGEP);
+        createNewBase(BaseOffset, OldBase, GEP);
       }
 
       IRBuilder<> Builder(GEP);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cb093a1613110e8..72f14680bda509e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15982,6 +15982,19 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                           AM.Scale);
 }
 
+// Check whether the 2 offsets belong to the same imm24 range, and their high
+// 12bits are same, then their high part can be decoded with the offset of add.
+int64_t AArch64TargetLowering::getPreferBaseOffset(int64_t MinOffset,
+                                                   int64_t MaxOffset) const {
+  int64_t HighPart = MinOffset & 0xfff000;
+  if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) {
+    // Rebase the value to an integer multiple of imm12.
+    return HighPart;
+  }
+
+  return 0;
+}
+
 bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
   // Consider splitting large offset of struct or array.
   return true;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 25d7cb6d212d1f4..b85f03e872ae7de 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -690,6 +690,9 @@ class AArch64TargetLowering : public TargetLowering {
                              unsigned AS,
                              Instruction *I = nullptr) const override;
 
+  int64_t getPreferBaseOffset(int64_t MinOffset,
+                              int64_t MaxOffset) const override;
+
   /// Return true if an FMA operation is faster than a pair of fmul and fadd
   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index 69c558d9d5599dc..cdd8df8d8adec40 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -209,3 +209,92 @@ define void @t17(i64 %a) {
   %3 = load volatile i64, ptr %2, align 8
   ret void
 }
+
+; https://gcc.godbolt.org/z/ErhhdxMv3
+define i32 @LdOffset_i8(ptr %a)  {
+; CHECK-LABEL: LdOffset_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #56952 // =0xde78
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrb w0, [x0, x8]
+; CHECK-NEXT:    ret
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
+  %val = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %val to i32
+  ret i32 %conv
+}
+
+define i32 @LdOffset_i16(ptr %a)  {
+; CHECK-LABEL: LdOffset_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
+; CHECK-NEXT:    movk w8, #31, lsl #16
+; CHECK-NEXT:    ldrsh w0, [x0, x8]
+; CHECK-NEXT:    ret
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
+  %val = load i16, ptr %arrayidx, align 2
+  %conv = sext i16 %val to i32
+  ret i32 %conv
+}
+
+define i32 @LdOffset_i32(ptr %a)  {
+; CHECK-LABEL: LdOffset_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #31200 // =0x79e0
+; CHECK-NEXT:    movk w8, #63, lsl #16
+; CHECK-NEXT:    ldr w0, [x0, x8]
+; CHECK-NEXT:    ret
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
+  %val = load i32, ptr %arrayidx, align 4
+  ret i32 %val
+}
+
+; https://gcc.godbolt.org/z/q6frE9ePe
+define i64 @LdOffset_i64_multi_offset(ptr %a) {
+; CHECK-LABEL: LdOffset_i64_multi_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, #2031, lsl #12 // =8318976
+; CHECK-NEXT:    add x8, x8, #960
+; CHECK-NEXT:    ldr x9, [x8]
+; CHECK-NEXT:    ldr x8, [x8, #2056]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
+  %val0 = load i64, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 1040249
+  %val1 = load i64, ptr %arrayidx1, align 8
+  %add = add nsw i64 %val1, %val0
+  ret i64 %add
+}
+
+define i64 @LdOffset_i64_multi_offset_with_commmon_base(ptr %a) {
+; CHECK-LABEL: LdOffset_i64_multi_offset_with_commmon_base:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, #507, lsl #12 // =2076672
+; CHECK-NEXT:    ldr x9, [x8, #26464]
+; CHECK-NEXT:    ldr x8, [x8, #26496]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
+  %b = getelementptr inbounds i16, ptr %a, i64 1038336
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 3308
+  %val0 = load i64, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 3312
+  %val1 = load i64, ptr %arrayidx1, align 8
+  %add = add nsw i64 %val1, %val0
+  ret i64 %add
+}
+
+; Negative test: the offset is odd
+define i32 @LdOffset_i16_odd_offset(ptr nocapture noundef readonly %a)  {
+; CHECK-LABEL: LdOffset_i16_odd_offset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #56953 // =0xde79
+; CHECK-NEXT:    movk w8, #15, lsl #16
+; CHECK-NEXT:    ldrsh w0, [x0, x8]
+; CHECK-NEXT:    ret
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039993
+  %val = load i16, ptr %arrayidx, align 2
+  %conv = sext i16 %val to i32
+  ret i32 %conv
+}
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
index 080b3dd75ee9a9d..097575ca86bccb1 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
@@ -6,18 +6,17 @@
 define void @test1(ptr %s, i32 %n) {
 ; CHECK-LABEL: test1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr x9, [x0]
-; CHECK-NEXT:    mov w10, #40000 // =0x9c40
-; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    add x9, x9, x10
-; CHECK-NEXT:    cmp w8, w1
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    mov w9, wzr
+; CHECK-NEXT:    add x8, x8, #9, lsl #12 // =36864
+; CHECK-NEXT:    cmp w9, w1
 ; CHECK-NEXT:    b.ge .LBB0_2
 ; CHECK-NEXT:  .LBB0_1: // %while_body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    str w8, [x9, #4]
-; CHECK-NEXT:    add w8, w8, #1
-; CHECK-NEXT:    str w8, [x9]
-; CHECK-NEXT:    cmp w8, w1
+; CHECK-NEXT:    str w9, [x8, #3140]
+; CHECK-NEXT:    add w9, w9, #1
+; CHECK-NEXT:    str w9, [x8, #3136]
+; CHECK-NEXT:    cmp w9, w1
 ; CHECK-NEXT:    b.lt .LBB0_1
 ; CHECK-NEXT:  .LBB0_2: // %while_end
 ; CHECK-NEXT:    ret
@@ -47,16 +46,15 @@ define void @test2(ptr %struct, i32 %n) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cbz x0, .LBB1_3
 ; CHECK-NEXT:  // %bb.1: // %while_cond.preheader
-; CHECK-NEXT:    mov w8, #40000 // =0x9c40
 ; CHECK-NEXT:    mov w9, wzr
-; CHECK-NEXT:    add x8, x0, x8
+; CHECK-NEXT:    add x8, x0, #9, lsl #12 // =36864
 ; CHECK-NEXT:    cmp w9, w1
 ; CHECK-NEXT:    b.ge .LBB1_3
 ; CHECK-NEXT:  .LBB1_2: // %while_body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    str w9, [x8, #4]
+; CHECK-NEXT:    str w9, [x8, #3140]
 ; CHECK-NEXT:    add w9, w9, #1
-; CHECK-NEXT:    str w9, [x8]
+; CHECK-NEXT:    str w9, [x8, #3136]
 ; CHECK-NEXT:    cmp w9, w1
 ; CHECK-NEXT:    b.lt .LBB1_2
 ; CHECK-NEXT:  .LBB1_3: // %while_end
@@ -89,16 +87,15 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) {
 ; CHECK-NEXT:    csel x8, x1, x0, ne
 ; CHECK-NEXT:    cbz x8, .LBB2_3
 ; CHECK-NEXT:  // %bb.1: // %while_cond.preheader
-; CHECK-NEXT:    mov w10, #40000 // =0x9c40
 ; CHECK-NEXT:    mov w9, wzr
-; CHECK-NEXT:    add x8, x8, x10
+; CHECK-NEXT:    add x8, x8, #9, lsl #12 // =36864
 ; CHECK-NEXT:    cmp w9, w3
 ; CHECK-NEXT:    b.ge .LBB2_3
 ; CHECK-NEXT:  .LBB2_2: // %while_body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    str w9, [x8, #4]
+; CHECK-NEXT:    str w9, [x8, #3140]
 ; CHECK-NEXT:    add w9, w9, #1
-; CHECK-NEXT:    str w9, [x8]
+; CHECK-NEXT:    str w9, [x8, #3136]
 ; CHECK-NEXT:    cmp w9, w3
 ; CHECK-NEXT:    b.lt .LBB2_2
 ; CHECK-NEXT:  .LBB2_3: // %while_end
@@ -141,17 +138,15 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
 ; CHECK-NEXT:    .cfi_personality 156, DW.ref.__FrameHandler
 ; CHECK-NEXT:    .cfi_lsda 28, .Lexception0
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    mov w21, wzr
-; CHECK-NEXT:    mov w20, #40000 // =0x9c40
+; CHECK-NEXT:    mov w20, wzr
 ; CHECK-NEXT:  .LBB3_1: // %while_cond
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:  .Ltmp0:
@@ -159,23 +154,22 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
 ; CHECK-NEXT:  .Ltmp1:
 ; CHECK-NEXT:  // %bb.2: // %while_cond_x.split
 ; CHECK-NEXT:    // in Loop: Header=BB3_1 Depth=1
-; CHECK-NEXT:    add x8, x0, x20
-; CHECK-NEXT:    cmp w21, w19
-; CHECK-NEXT:    str wzr, [x8]
+; CHECK-NEXT:    add x8, x0, #9, lsl #12 // =36864
+; CHECK-NEXT:    cmp w20, w19
+; CHECK-NEXT:    str wzr, [x8, #3136]
 ; CHECK-NEXT:    b.ge .LBB3_4
 ; CHECK-NEXT:  // %bb.3: // %while_body
 ; CHECK-NEXT:    // in Loop: Header=BB3_1 Depth=1
-; CHECK-NEXT:    str w21, [x8, #4]
-; CHECK-NEXT:    add w21, w21, #1
-; CHECK-NEXT:    str w21, [x8]
+; CHECK-NEXT:    str w20, [x8, #3140]
+; CHECK-NEXT:    add w20, w20, #1
+; CHECK-NEXT:    str w20, [x8, #3136]
 ; CHECK-NEXT:    b .LBB3_1
 ; CHECK-NEXT:  .LBB3_4: // %while_end
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w19
 ; CHECK-NEXT:    .cfi_restore w20
-; CHECK-NEXT:    .cfi_restore w21
 ; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB3_5: // %cleanup
@@ -223,14 +217,13 @@ define void @test5(ptr %s, i32 %n) {
 ; CHECK-NEXT:    ldr x8, [x0]
 ; CHECK-NEXT:    mov w9, wzr
 ; CHECK-NEXT:    add x8, x8, #19, lsl #12 // =77824
-; CHECK-NEXT:    add x8, x8, #2176
 ; CHECK-NEXT:    cmp w9, w1
 ; CHECK-NEXT:    b.ge .LBB4_2
 ; CHECK-NEXT:  .LBB4_1: // %while_body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    str w9, [x8, #4]
+; CHECK-NEXT:    str w9, [x8, #2180]
 ; CHECK-NEXT:    add w9, w9, #1
-; CHECK-NEXT:    str w9, [x8]
+; CHECK-NEXT:    str w9, [x8, #2176]
 ; CHECK-NEXT:    cmp w9, w1
 ; CHECK-NEXT:    b.lt .LBB4_1
 ; CHECK-NEXT:  .LBB4_2: // %while_end

davemgreen

Thanks - this looks good from that I can see.

davemgreen · 2023-12-03T17:11:03Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+int64_t AArch64TargetLowering::getPreferBaseOffset(int64_t MinOffset,
+                                                   int64_t MaxOffset) const {
+  int64_t HighPart = MinOffset & 0xfff000;
+  if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) {


Maybe do HighPart = MinOffset & ~0xfffULL;, to try and capture more ranges.

apply your comment, thanks

davemgreen · 2023-12-03T17:13:09Z

llvm/lib/CodeGen/CodeGenPrepare.cpp

+      return;
+    };
+
+    // Check whether all the offsets can be encoded with perfered common base.


perfered -> prefered

davemgreen · 2023-12-03T17:18:55Z

llvm/include/llvm/CodeGen/TargetLowering.h

@@ -2721,6 +2721,12 @@ class TargetLoweringBase {
                                     Type *Ty, unsigned AddrSpace,
                                     Instruction *I = nullptr) const;

+  /// Return the perfered common base offset.
+  virtual int64_t getPreferBaseOffset(int64_t MinOffset,


Maybe call this something like getPreferredLargeGEPBaseOffset?

apply your comment, thanks

davemgreen · 2023-12-03T17:20:03Z

llvm/test/CodeGen/AArch64/arm64-addrmode.ll

+  ret i32 %val
+}
+
+; https://gcc.godbolt.org/z/q6frE9ePe


Remove the godbolt links

davemgreen

Thank you. Looks good to me.

When all the large const offsets masked with the same value from bit-12 to bit-23. Fold add x8, x0, llvm#2031, lsl llvm#12 add x8, x8, llvm#960 ldr x9, [x8, x8] ldr x8, [x8, llvm#2056] into add x8, x0, llvm#2031, lsl llvm#12 ldr x9, [x8, llvm#960] ldr x8, [x8, llvm#3016]

llvmbot added backend:AArch64 llvm:transforms labels Dec 1, 2023

vfdff requested review from nikic, efriedma-quic and davemgreen December 1, 2023 07:44

vfdff force-pushed the fix-547-point2-multiuse branch 2 times, most recently from 9f3cf99 to 0853acf Compare December 1, 2023 08:07

vfdff mentioned this pull request Dec 1, 2023

[AArch64] merge index address with large offset into base address #72187

Closed

davemgreen reviewed Dec 3, 2023

View reviewed changes

vfdff force-pushed the fix-547-point2-multiuse branch from 0853acf to d859e2c Compare December 4, 2023 11:52

vfdff requested a review from davemgreen December 4, 2023 11:54

davemgreen approved these changes Dec 4, 2023

View reviewed changes

vfdff added 2 commits December 5, 2023 09:01

[AArch64] Precommit tests for PR71917

6a8a562

vfdff force-pushed the fix-547-point2-multiuse branch from d859e2c to d6f4d52 Compare December 5, 2023 01:02

vfdff merged commit d6f4d52 into llvm:main Dec 5, 2023

vfdff deleted the fix-547-point2-multiuse branch December 5, 2023 07:09

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[CGP][AArch64] Rebase the common base offset for better ISel #74046

[CGP][AArch64] Rebase the common base offset for better ISel #74046

vfdff commented Dec 1, 2023 •

edited

Loading

llvmbot commented Dec 1, 2023 •

edited

Loading

davemgreen left a comment

davemgreen Dec 3, 2023

vfdff Dec 4, 2023

davemgreen Dec 3, 2023

vfdff Dec 4, 2023

davemgreen Dec 3, 2023

vfdff Dec 4, 2023

davemgreen Dec 3, 2023

vfdff Dec 4, 2023

davemgreen left a comment

[CGP][AArch64] Rebase the common base offset for better ISel #74046

[CGP][AArch64] Rebase the common base offset for better ISel #74046

Conversation

vfdff commented Dec 1, 2023 • edited Loading

llvmbot commented Dec 1, 2023 • edited Loading

davemgreen left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

davemgreen left a comment

Choose a reason for hiding this comment

vfdff commented Dec 1, 2023 •

edited

Loading

llvmbot commented Dec 1, 2023 •

edited

Loading