-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[CGP][AArch64] Rebase the common base offset for better ISel #74046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-aarch64 Author: Allen (vfdff) ChangesWhen all the large const offsets masked with the same value from bit-12 to bit-23.
into
Full diff: https://github.com/llvm/llvm-project/pull/74046.diff 7 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index fd3410586e172a8..febe4ed108ef076 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -342,6 +342,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}
+ int64_t getPreferBaseOffset(int64_t MinOffset, int64_t MaxOffset) {
+ return getTLI()->getPreferBaseOffset(MinOffset, MaxOffset);
+ }
+
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const {
auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 77ee6b89ed8a34f..c3d9ea07f73065f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,8 +30,8 @@
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -2721,6 +2721,12 @@ class TargetLoweringBase {
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;
+ /// Return the perfered common base offset.
+ virtual int64_t getPreferBaseOffset(int64_t MinOffset,
+ int64_t MaxOffset) const {
+ return 0;
+ }
+
/// Return true if the specified immediate is legal icmp immediate, that is
/// the target has icmp instructions which can compare a register against the
/// immediate without having to materialize the immediate into a register.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 885d2d3ce24825b..e4e2dda1724f46c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6121,6 +6121,55 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
Value *NewBaseGEP = nullptr;
+ auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
+ GetElementPtrInst *GEP) {
+ LLVMContext &Ctx = GEP->getContext();
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
+ Type *I8PtrTy =
+ PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
+ Type *I8Ty = Type::getInt8Ty(Ctx);
+
+ BasicBlock::iterator NewBaseInsertPt;
+ BasicBlock *NewBaseInsertBB;
+ if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
+ // If the base of the struct is an instruction, the new base will be
+ // inserted close to it.
+ NewBaseInsertBB = BaseI->getParent();
+ if (isa<PHINode>(BaseI))
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
+ NewBaseInsertBB =
+ SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ } else
+ NewBaseInsertPt = std::next(BaseI->getIterator());
+ } else {
+ // If the current base is an argument or global value, the new base
+ // will be inserted to the entry block.
+ NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ }
+ IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
+ // Create a new base.
+ Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
+ NewBaseGEP = OldBase;
+ if (NewBaseGEP->getType() != I8PtrTy)
+ NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
+ NewBaseGEP =
+ NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
+ NewGEPBases.insert(NewBaseGEP);
+ return;
+ };
+
+ // Check whether all the offsets can be encoded with perfered common base.
+ if (int64_t PreferBase = TLI->getPreferBaseOffset(
+ LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
+ BaseOffset = PreferBase;
+ // Create a new base if the offset of the BaseGEP can be decoded with one
+ // instruction.
+ createNewBase(BaseOffset, OldBase, BaseGEP);
+ }
+
auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
GetElementPtrInst *GEP = LargeOffsetGEP->first;
@@ -6153,35 +6202,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
if (!NewBaseGEP) {
// Create a new base if we don't have one yet. Find the insertion
// pointer for the new base first.
- BasicBlock::iterator NewBaseInsertPt;
- BasicBlock *NewBaseInsertBB;
- if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
- // If the base of the struct is an instruction, the new base will be
- // inserted close to it.
- NewBaseInsertBB = BaseI->getParent();
- if (isa<PHINode>(BaseI))
- NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
- else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
- NewBaseInsertBB =
- SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
- NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
- } else
- NewBaseInsertPt = std::next(BaseI->getIterator());
- } else {
- // If the current base is an argument or global value, the new base
- // will be inserted to the entry block.
- NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
- NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
- }
- IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
- // Create a new base.
- Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
- NewBaseGEP = OldBase;
- if (NewBaseGEP->getType() != I8PtrTy)
- NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
- NewBaseGEP =
- NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
- NewGEPBases.insert(NewBaseGEP);
+ createNewBase(BaseOffset, OldBase, GEP);
}
IRBuilder<> Builder(GEP);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cb093a1613110e8..72f14680bda509e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15982,6 +15982,19 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
AM.Scale);
}
+// Check whether the 2 offsets belong to the same imm24 range, and their high
+// 12bits are same, then their high part can be decoded with the offset of add.
+int64_t AArch64TargetLowering::getPreferBaseOffset(int64_t MinOffset,
+ int64_t MaxOffset) const {
+ int64_t HighPart = MinOffset & 0xfff000;
+ if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) {
+ // Rebase the value to an integer multiple of imm12.
+ return HighPart;
+ }
+
+ return 0;
+}
+
bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
// Consider splitting large offset of struct or array.
return true;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 25d7cb6d212d1f4..b85f03e872ae7de 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -690,6 +690,9 @@ class AArch64TargetLowering : public TargetLowering {
unsigned AS,
Instruction *I = nullptr) const override;
+ int64_t getPreferBaseOffset(int64_t MinOffset,
+ int64_t MaxOffset) const override;
+
/// Return true if an FMA operation is faster than a pair of fmul and fadd
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index 69c558d9d5599dc..cdd8df8d8adec40 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -209,3 +209,92 @@ define void @t17(i64 %a) {
%3 = load volatile i64, ptr %2, align 8
ret void
}
+
+; https://gcc.godbolt.org/z/ErhhdxMv3
+define i32 @LdOffset_i8(ptr %a) {
+; CHECK-LABEL: LdOffset_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #56952 // =0xde78
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrb w0, [x0, x8]
+; CHECK-NEXT: ret
+ %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
+ %val = load i8, ptr %arrayidx, align 1
+ %conv = zext i8 %val to i32
+ ret i32 %conv
+}
+
+define i32 @LdOffset_i16(ptr %a) {
+; CHECK-LABEL: LdOffset_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #48368 // =0xbcf0
+; CHECK-NEXT: movk w8, #31, lsl #16
+; CHECK-NEXT: ldrsh w0, [x0, x8]
+; CHECK-NEXT: ret
+ %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
+ %val = load i16, ptr %arrayidx, align 2
+ %conv = sext i16 %val to i32
+ ret i32 %conv
+}
+
+define i32 @LdOffset_i32(ptr %a) {
+; CHECK-LABEL: LdOffset_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #31200 // =0x79e0
+; CHECK-NEXT: movk w8, #63, lsl #16
+; CHECK-NEXT: ldr w0, [x0, x8]
+; CHECK-NEXT: ret
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
+ %val = load i32, ptr %arrayidx, align 4
+ ret i32 %val
+}
+
+; https://gcc.godbolt.org/z/q6frE9ePe
+define i64 @LdOffset_i64_multi_offset(ptr %a) {
+; CHECK-LABEL: LdOffset_i64_multi_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, #2031, lsl #12 // =8318976
+; CHECK-NEXT: add x8, x8, #960
+; CHECK-NEXT: ldr x9, [x8]
+; CHECK-NEXT: ldr x8, [x8, #2056]
+; CHECK-NEXT: add x0, x8, x9
+; CHECK-NEXT: ret
+ %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
+ %val0 = load i64, ptr %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 1040249
+ %val1 = load i64, ptr %arrayidx1, align 8
+ %add = add nsw i64 %val1, %val0
+ ret i64 %add
+}
+
+define i64 @LdOffset_i64_multi_offset_with_commmon_base(ptr %a) {
+; CHECK-LABEL: LdOffset_i64_multi_offset_with_commmon_base:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, #507, lsl #12 // =2076672
+; CHECK-NEXT: ldr x9, [x8, #26464]
+; CHECK-NEXT: ldr x8, [x8, #26496]
+; CHECK-NEXT: add x0, x8, x9
+; CHECK-NEXT: ret
+ %b = getelementptr inbounds i16, ptr %a, i64 1038336
+ %arrayidx = getelementptr inbounds i64, ptr %b, i64 3308
+ %val0 = load i64, ptr %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 3312
+ %val1 = load i64, ptr %arrayidx1, align 8
+ %add = add nsw i64 %val1, %val0
+ ret i64 %add
+}
+
+; Negative test: the offset is odd
+define i32 @LdOffset_i16_odd_offset(ptr nocapture noundef readonly %a) {
+; CHECK-LABEL: LdOffset_i16_odd_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #56953 // =0xde79
+; CHECK-NEXT: movk w8, #15, lsl #16
+; CHECK-NEXT: ldrsh w0, [x0, x8]
+; CHECK-NEXT: ret
+ %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039993
+ %val = load i16, ptr %arrayidx, align 2
+ %conv = sext i16 %val to i32
+ ret i32 %conv
+}
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
index 080b3dd75ee9a9d..097575ca86bccb1 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
@@ -6,18 +6,17 @@
define void @test1(ptr %s, i32 %n) {
; CHECK-LABEL: test1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr x9, [x0]
-; CHECK-NEXT: mov w10, #40000 // =0x9c40
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: add x9, x9, x10
-; CHECK-NEXT: cmp w8, w1
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: mov w9, wzr
+; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
+; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB0_2
; CHECK-NEXT: .LBB0_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w8, [x9, #4]
-; CHECK-NEXT: add w8, w8, #1
-; CHECK-NEXT: str w8, [x9]
-; CHECK-NEXT: cmp w8, w1
+; CHECK-NEXT: str w9, [x8, #3140]
+; CHECK-NEXT: add w9, w9, #1
+; CHECK-NEXT: str w9, [x8, #3136]
+; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB0_1
; CHECK-NEXT: .LBB0_2: // %while_end
; CHECK-NEXT: ret
@@ -47,16 +46,15 @@ define void @test2(ptr %struct, i32 %n) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz x0, .LBB1_3
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
-; CHECK-NEXT: mov w8, #40000 // =0x9c40
; CHECK-NEXT: mov w9, wzr
-; CHECK-NEXT: add x8, x0, x8
+; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB1_3
; CHECK-NEXT: .LBB1_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #4]
+; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
-; CHECK-NEXT: str w9, [x8]
+; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB1_2
; CHECK-NEXT: .LBB1_3: // %while_end
@@ -89,16 +87,15 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) {
; CHECK-NEXT: csel x8, x1, x0, ne
; CHECK-NEXT: cbz x8, .LBB2_3
; CHECK-NEXT: // %bb.1: // %while_cond.preheader
-; CHECK-NEXT: mov w10, #40000 // =0x9c40
; CHECK-NEXT: mov w9, wzr
-; CHECK-NEXT: add x8, x8, x10
+; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: .LBB2_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #4]
+; CHECK-NEXT: str w9, [x8, #3140]
; CHECK-NEXT: add w9, w9, #1
-; CHECK-NEXT: str w9, [x8]
+; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.lt .LBB2_2
; CHECK-NEXT: .LBB2_3: // %while_end
@@ -141,17 +138,15 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: .cfi_personality 156, DW.ref.__FrameHandler
; CHECK-NEXT: .cfi_lsda 28, .Lexception0
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w19, w0
-; CHECK-NEXT: mov w21, wzr
-; CHECK-NEXT: mov w20, #40000 // =0x9c40
+; CHECK-NEXT: mov w20, wzr
; CHECK-NEXT: .LBB3_1: // %while_cond
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: .Ltmp0:
@@ -159,23 +154,22 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: // %bb.2: // %while_cond_x.split
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
-; CHECK-NEXT: add x8, x0, x20
-; CHECK-NEXT: cmp w21, w19
-; CHECK-NEXT: str wzr, [x8]
+; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864
+; CHECK-NEXT: cmp w20, w19
+; CHECK-NEXT: str wzr, [x8, #3136]
; CHECK-NEXT: b.ge .LBB3_4
; CHECK-NEXT: // %bb.3: // %while_body
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
-; CHECK-NEXT: str w21, [x8, #4]
-; CHECK-NEXT: add w21, w21, #1
-; CHECK-NEXT: str w21, [x8]
+; CHECK-NEXT: str w20, [x8, #3140]
+; CHECK-NEXT: add w20, w20, #1
+; CHECK-NEXT: str w20, [x8, #3136]
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_4: // %while_end
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w20
-; CHECK-NEXT: .cfi_restore w21
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB3_5: // %cleanup
@@ -223,14 +217,13 @@ define void @test5(ptr %s, i32 %n) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: add x8, x8, #19, lsl #12 // =77824
-; CHECK-NEXT: add x8, x8, #2176
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #4]
+; CHECK-NEXT: str w9, [x8, #2180]
; CHECK-NEXT: add w9, w9, #1
-; CHECK-NEXT: str w9, [x8]
+; CHECK-NEXT: str w9, [x8, #2176]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB4_1
; CHECK-NEXT: .LBB4_2: // %while_end
|
9f3cf99
to
0853acf
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - this looks good from that I can see.
int64_t AArch64TargetLowering::getPreferBaseOffset(int64_t MinOffset, | ||
int64_t MaxOffset) const { | ||
int64_t HighPart = MinOffset & 0xfff000; | ||
if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe do HighPart = MinOffset & ~0xfffULL;
, to try and capture more ranges.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
apply your comment, thanks
llvm/lib/CodeGen/CodeGenPrepare.cpp
Outdated
return; | ||
}; | ||
|
||
// Check whether all the offsets can be encoded with perfered common base. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
perfered -> prefered
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
@@ -2721,6 +2721,12 @@ class TargetLoweringBase { | |||
Type *Ty, unsigned AddrSpace, | |||
Instruction *I = nullptr) const; | |||
|
|||
/// Return the perfered common base offset. | |||
virtual int64_t getPreferBaseOffset(int64_t MinOffset, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe call this something like getPreferredLargeGEPBaseOffset?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
apply your comment, thanks
ret i32 %val | ||
} | ||
|
||
; https://gcc.godbolt.org/z/q6frE9ePe |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove the godbolt links
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
0853acf
to
d859e2c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you. Looks good to me.
d859e2c
to
d6f4d52
Compare
When all the large const offsets masked with the same value from bit-12 to bit-23 (low imm12 will be encoded in load/store instructions).
Fold
into
Fix the multi-use scenes for #71917