diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 7d71c316bcb0a..175f6ef49c3ba 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4089,6 +4089,29 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) { return MI.getOperand(Idx); } +const MachineOperand & +AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case AArch64::LDRBroX: + case AArch64::LDRBBroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSBWroX: + case AArch64::LDRHroX: + case AArch64::LDRHHroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSHWroX: + case AArch64::LDRWroX: + case AArch64::LDRSroX: + case AArch64::LDRSWroX: + case AArch64::LDRDroX: + case AArch64::LDRXroX: + case AArch64::LDRQroX: + return MI.getOperand(4); + } +} + static const TargetRegisterClass *getRegClass(const MachineInstr &MI, Register Reg) { if (MI.getParent() == nullptr) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 6526f6740747a..db24a19fe5f8e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -111,6 +111,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// Returns the immediate offset operator of a load/store. static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI); + /// Returns the shift amount operator of a load/store. + static const MachineOperand &getLdStAmountOp(const MachineInstr &MI); + /// Returns whether the instruction is FP or NEON. static bool isFpOrNEON(const MachineInstr &MI); diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index dc6d5b8950c34..aa7a4bc235361 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -62,6 +62,8 @@ STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); +STATISTIC(NumConstOffsetFolded, + "Number of const offset of index address folded"); DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming"); @@ -75,6 +77,11 @@ static cl::opt LdStLimit("aarch64-load-store-scan-limit", static cl::opt UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); +// The LdStConstLimit limits how far we search for const offset instructions +// when we form index address load/store instructions. +static cl::opt LdStConstLimit("aarch64-load-store-const-scan-limit", + cl::init(10), cl::Hidden); + // Enable register renaming to find additional store pairing opportunities. static cl::opt EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden); @@ -171,6 +178,13 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit); + // Scan the instruction list to find a register assigned with a const + // value that can be combined with the current instruction (a load or store) + // using base addressing with writeback. Scan backwards. + MachineBasicBlock::iterator + findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit, + unsigned &Offset); + // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan backwards. @@ -182,11 +196,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset); + bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI, + unsigned IndexReg, unsigned &Offset); + // Merge a pre- or post-index base register update into a ld/st instruction. MachineBasicBlock::iterator mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); + MachineBasicBlock::iterator + mergeConstOffsetInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, unsigned Offset, + int Scale); + // Find and merge zero store instructions. bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); @@ -199,6 +221,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge a base register updates before or after a ld/st instruction. bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); + // Find and merge a index ldr/st instruction into a base ld/st instruction. + bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale); + bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -481,6 +506,42 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { } } +static unsigned getBaseAddressOpcode(unsigned Opc) { + // TODO: Add more index address loads/stores. + switch (Opc) { + default: + llvm_unreachable("Opcode has no base address equivalent!"); + case AArch64::LDRBroX: + return AArch64::LDRBui; + case AArch64::LDRBBroX: + return AArch64::LDRBBui; + case AArch64::LDRSBXroX: + return AArch64::LDRSBXui; + case AArch64::LDRSBWroX: + return AArch64::LDRSBWui; + case AArch64::LDRHroX: + return AArch64::LDRHui; + case AArch64::LDRHHroX: + return AArch64::LDRHHui; + case AArch64::LDRSHXroX: + return AArch64::LDRSHXui; + case AArch64::LDRSHWroX: + return AArch64::LDRSHWui; + case AArch64::LDRWroX: + return AArch64::LDRWui; + case AArch64::LDRSroX: + return AArch64::LDRSui; + case AArch64::LDRSWroX: + return AArch64::LDRSWui; + case AArch64::LDRDroX: + return AArch64::LDRDui; + case AArch64::LDRXroX: + return AArch64::LDRXui; + case AArch64::LDRQroX: + return AArch64::LDRQui; + } +} + static unsigned getPostIndexedOpcode(unsigned Opc) { switch (Opc) { default: @@ -722,6 +783,41 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { } } +// Make sure this is a reg+reg Ld/St +static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + return false; + // Scaled instructions. + // TODO: Add more index address stores. + case AArch64::LDRBroX: + case AArch64::LDRBBroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSBWroX: + Scale = 1; + return true; + case AArch64::LDRHroX: + case AArch64::LDRHHroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSHWroX: + Scale = 2; + return true; + case AArch64::LDRWroX: + case AArch64::LDRSroX: + case AArch64::LDRSWroX: + Scale = 4; + return true; + case AArch64::LDRDroX: + case AArch64::LDRXroX: + Scale = 8; + return true; + case AArch64::LDRQroX: + Scale = 16; + return true; + } +} + static bool isRewritableImplicitDef(unsigned Opc) { switch (Opc) { default: @@ -2018,6 +2114,63 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, return NextI; } +MachineBasicBlock::iterator +AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, + unsigned Offset, int Scale) { + assert((Update->getOpcode() == AArch64::MOVKWi) && + "Unexpected const mov instruction to merge!"); + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E); + MachineInstr &MemMI = *I; + unsigned Mask = (1 << 12) * Scale - 1; + unsigned Low = Offset & Mask; + unsigned High = Offset - Low; + Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); + Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); + MachineInstrBuilder AddMIB, MemMIB; + + // Add IndexReg, BaseReg, High (the BaseReg may be SP) + AddMIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri)) + .addDef(IndexReg) + .addUse(BaseReg) + .addImm(High >> 12) // shifted value + .addImm(12); // shift 12 + (void)AddMIB; + // Ld/St DestReg, IndexReg, Imm12 + unsigned NewOpc = getBaseAddressOpcode(I->getOpcode()); + MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .add(getLdStRegOp(MemMI)) + .add(AArch64InstrInfo::getLdStOffsetOp(MemMI)) + .addImm(Low / Scale) + .setMemRefs(I->memoperands()) + .setMIFlags(I->mergeFlagsWith(*Update)); + (void)MemMIB; + + ++NumConstOffsetFolded; + LLVM_DEBUG(dbgs() << "Creating base address load/store.\n"); + LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); + LLVM_DEBUG(PrevI->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(Update->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(I->print(dbgs())); + LLVM_DEBUG(dbgs() << " with instruction:\n "); + LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << " "); + LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs())); + LLVM_DEBUG(dbgs() << "\n"); + + // Erase the old instructions for the block. + I->eraseFromParent(); + PrevI->eraseFromParent(); + Update->eraseFromParent(); + + return NextI; +} + bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset) { @@ -2065,6 +2218,31 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, return false; } +bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI, + MachineInstr &MI, + unsigned IndexReg, + unsigned &Offset) { + // The update instruction source and destination register must be the + // same as the load/store index register. + if (MI.getOpcode() == AArch64::MOVKWi && + TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) { + + // movz + movk hold a large offset of a Ld/St instruction. + MachineBasicBlock::iterator B = MI.getParent()->begin(); + MachineBasicBlock::iterator MBBI = &MI; + MBBI = prev_nodbg(MBBI, B); + MachineInstr &MovzMI = *MBBI; + if (MovzMI.getOpcode() == AArch64::MOVZWi) { + unsigned Low = MovzMI.getOperand(1).getImm(); + unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm(); + Offset = High + Low; + // 12-bit optionally shifted immediates are legal for adds. + return Offset >> 24 == 0; + } + } + return false; +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); @@ -2220,6 +2398,60 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } +MachineBasicBlock::iterator +AArch64LoadStoreOpt::findMatchingConstOffsetBackward( + MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) { + MachineBasicBlock::iterator B = I->getParent()->begin(); + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineInstr &MemMI = *I; + MachineBasicBlock::iterator MBBI = I; + + // If the load is the first instruction in the block, there's obviously + // not any matching load or store. + if (MBBI == B) + return E; + + // Make sure the IndexReg is killed and the shift amount is zero. + // TODO: Relex this restriction to extend, simplify processing now. + if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() || + !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() || + (AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0)) + return E; + + Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg(); + + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + unsigned Count = 0; + do { + MBBI = prev_nodbg(MBBI, B); + MachineInstr &MI = *MBBI; + + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + // If we found a match, return it. + if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) { + return MBBI; + } + + // Update the status of what the instruction clobbered and used. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + + // Otherwise, if the index register is used or modified, we have no match, + // so return early. + if (!ModifiedRegUnits.available(IndexReg) || + !UsedRegUnits.available(IndexReg)) + return E; + + } while (MBBI != B && Count < Limit); + return E; +} + bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -2404,6 +2636,34 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate return false; } +bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, + int Scale) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + MachineBasicBlock::iterator Update; + + // Don't know how to handle unscaled pre/post-index versions below, so bail. + if (TII->hasUnscaledLdStOffset(MI.getOpcode())) + return false; + + // Look back to try to find a const offset for index LdSt instruction. For + // example, + // mov x8, #LargeImm ; = a * (1<<12) + imm12 + // ldr x1, [x0, x8] + // merged into: + // add x8, x0, a * (1<<12) + // ldr x1, [x8, imm12] + unsigned Offset; + Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset); + if (Update != E && (Offset & (Scale - 1)) == 0) { + // Merge the imm12 into the ld/st. + MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale); + return true; + } + + return false; +} + bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { @@ -2482,6 +2742,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++MBBI; } + // 5) Find a register assigned with a const value that can be combined with + // into the load or store. e.g., + // mov x8, #LargeImm ; = a * (1<<12) + imm12 + // ldr x1, [x0, x8] + // ; becomes + // add x8, x0, a * (1<<12) + // ldr x1, [x8, imm12] + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + int Scale; + if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale)) + Modified = true; + else + ++MBBI; + } + return Modified; } diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 3d4749a7b8e7d..bfef61abd8c12 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -210,12 +210,24 @@ define void @t17(i64 %a) { ret void } -define i32 @LdOffset_i8(ptr %a) { +; LDRBBroX +define i8 @LdOffset_i8(ptr %a) { ; CHECK-LABEL: LdOffset_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrb w0, [x8, #3704] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 + %val = load i8, ptr %arrayidx, align 1 + ret i8 %val +} + +; LDRBBroX +define i32 @LdOffset_i8_zext32(ptr %a) { +; CHECK-LABEL: LdOffset_i8_zext32: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrb w0, [x8, #3704] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -223,12 +235,76 @@ define i32 @LdOffset_i8(ptr %a) { ret i32 %conv } -define i32 @LdOffset_i16(ptr %a) { +; LDRSBWroX +define i32 @LdOffset_i8_sext32(ptr %a) { +; CHECK-LABEL: LdOffset_i8_sext32: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrsb w0, [x8, #3704] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 + %val = load i8, ptr %arrayidx, align 1 + %conv = sext i8 %val to i32 + ret i32 %conv +} + +; LDRBBroX +define i64 @LdOffset_i8_zext64(ptr %a) { +; CHECK-LABEL: LdOffset_i8_zext64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrb w0, [x8, #3704] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 + %val = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %val to i64 + ret i64 %conv +} + +; LDRSBXroX +define i64 @LdOffset_i8_sext64(ptr %a) { +; CHECK-LABEL: LdOffset_i8_sext64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrsb x0, [x8, #3704] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 + %val = load i8, ptr %arrayidx, align 1 + %conv = sext i8 %val to i64 + ret i64 %conv +} + +; LDRHHroX +define i16 @LdOffset_i16(ptr %a) { ; CHECK-LABEL: LdOffset_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrsh w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrh w0, [x8, #7408] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 + %val = load i16, ptr %arrayidx, align 2 + ret i16 %val +} + +; LDRHHroX +define i32 @LdOffset_i16_zext32(ptr %a) { +; CHECK-LABEL: LdOffset_i16_zext32: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrh w0, [x8, #7408] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 + %val = load i16, ptr %arrayidx, align 2 + %conv = zext i16 %val to i32 + ret i32 %conv +} + +; LDRSHWroX +define i32 @LdOffset_i16_sext32(ptr %a) { +; CHECK-LABEL: LdOffset_i16_sext32: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrsh w0, [x8, #7408] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 %val = load i16, ptr %arrayidx, align 2 @@ -236,18 +312,162 @@ define i32 @LdOffset_i16(ptr %a) { ret i32 %conv } +; LDRHHroX +define i64 @LdOffset_i16_zext64(ptr %a) { +; CHECK-LABEL: LdOffset_i16_zext64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrh w0, [x8, #7408] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 + %val = load i16, ptr %arrayidx, align 2 + %conv = zext i16 %val to i64 + ret i64 %conv +} + +; LDRSHXroX +define i64 @LdOffset_i16_sext64(ptr %a) { +; CHECK-LABEL: LdOffset_i16_sext64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrsh x0, [x8, #7408] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 + %val = load i16, ptr %arrayidx, align 2 + %conv = sext i16 %val to i64 + ret i64 %conv +} + +; LDRWroX define i32 @LdOffset_i32(ptr %a) { ; CHECK-LABEL: LdOffset_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31200 // =0x79e0 -; CHECK-NEXT: movk w8, #63, lsl #16 -; CHECK-NEXT: ldr w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldr w0, [x8, #14816] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 %val = load i32, ptr %arrayidx, align 4 ret i32 %val } +; LDRWroX +define i64 @LdOffset_i32_zext64(ptr %a) { +; CHECK-LABEL: LdOffset_i32_zext64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldr w0, [x8, #14816] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 + %val = load i32, ptr %arrayidx, align 2 + %conv = zext i32 %val to i64 + ret i64 %conv +} + +; LDRSWroX +define i64 @LdOffset_i32_sext64(ptr %a) { +; CHECK-LABEL: LdOffset_i32_sext64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldrsw x0, [x8, #14816] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 + %val = load i32, ptr %arrayidx, align 2 + %conv = sext i32 %val to i64 + ret i64 %conv +} + +; LDRXroX +define i64 @LdOffset_i64(ptr %a) { +; CHECK-LABEL: LdOffset_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304 +; CHECK-NEXT: ldr x0, [x8, #29632] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992 + %val = load i64, ptr %arrayidx, align 4 + ret i64 %val +} + +; LDRDroX +define <2 x i32> @LdOffset_v2i32(ptr %a) { +; CHECK-LABEL: LdOffset_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304 +; CHECK-NEXT: ldr d0, [x8, #29632] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992 + %val = load <2 x i32>, ptr %arrayidx, align 4 + ret <2 x i32> %val +} + +; LDRQroX +define <2 x i64> @LdOffset_v2i64(ptr %a) { +; CHECK-LABEL: LdOffset_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608 +; CHECK-NEXT: ldr q0, [x8, #59264] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992 + %val = load <2 x i64>, ptr %arrayidx, align 4 + ret <2 x i64> %val +} + +; LDRSBWroX +define double @LdOffset_i8_f64(ptr %a) { +; CHECK-LABEL: LdOffset_i8_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrsb w8, [x8, #3704] +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 + %val = load i8, ptr %arrayidx, align 1 + %conv = sitofp i8 %val to double + ret double %conv +} + +; LDRSHWroX +define double @LdOffset_i16_f64(ptr %a) { +; CHECK-LABEL: LdOffset_i16_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576 +; CHECK-NEXT: ldrsh w8, [x8, #7408] +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 + %val = load i16, ptr %arrayidx, align 2 + %conv = sitofp i16 %val to double + ret double %conv +} + +; LDRSroX +define double @LdOffset_i32_f64(ptr %a) { +; CHECK-LABEL: LdOffset_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152 +; CHECK-NEXT: ldr s0, [x8, #14816] +; CHECK-NEXT: ucvtf d0, d0 +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 + %val = load i32, ptr %arrayidx, align 4 + %conv = uitofp i32 %val to double + ret double %conv +} + +; LDRDroX +define double @LdOffset_i64_f64(ptr %a) { +; CHECK-LABEL: LdOffset_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304 +; CHECK-NEXT: ldr d0, [x8, #29632] +; CHECK-NEXT: scvtf d0, d0 +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992 + %val = load i64, ptr %arrayidx, align 8 + %conv = sitofp i64 %val to double + ret double %conv +} + define i64 @LdOffset_i64_multi_offset(ptr %a) { ; CHECK-LABEL: LdOffset_i64_multi_offset: ; CHECK: // %bb.0: @@ -295,3 +515,27 @@ define i32 @LdOffset_i16_odd_offset(ptr nocapture noundef readonly %a) { ret i32 %conv } +; Already encoded with a single mov MOVNWi +define i8 @LdOffset_i8_movnwi(ptr %a) { +; CHECK-LABEL: LdOffset_i8_movnwi: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #16777215 // =0xffffff +; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 16777215 + %val = load i8, ptr %arrayidx, align 1 + ret i8 %val +} + +; Negative test: the offset is too large to encoded with a add +define i8 @LdOffset_i8_too_large(ptr %a) { +; CHECK-LABEL: LdOffset_i8_too_large: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: movk w8, #256, lsl #16 +; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 16777217 + %val = load i8, ptr %arrayidx, align 1 + ret i8 %val +} diff --git a/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir b/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir new file mode 100755 index 0000000000000..15b6700398ea0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir @@ -0,0 +1,47 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass aarch64-ldst-opt %s -o - | FileCheck %s + + +--- +name: LdOffset +tracksRegLiveness: true +liveins: + - { reg: '$x0', virtual-reg: '' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: LdOffset + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x8 = ADDXri $x0, 253, 12 + ; CHECK-NEXT: renamable $w0 = LDRBBui killed renamable $x8, 3704 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + renamable $w8 = MOVZWi 56952, 0 + renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 + renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0 + RET undef $lr, implicit $w0 +... + +# Negative test: the IndexReg missing killed flags +--- +name: LdOffset_missing_killed +tracksRegLiveness: true +liveins: + - { reg: '$x0', virtual-reg: '' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: LdOffset_missing_killed + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = MOVZWi 56952, 0 + ; CHECK-NEXT: renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 + ; CHECK-NEXT: renamable $w0 = LDRBBroX killed renamable $x0, renamable $x8, 0, 0 + ; CHECK-NEXT: RET undef $lr, implicit $w0 + renamable $w8 = MOVZWi 56952, 0 + renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8 + renamable $w0 = LDRBBroX killed renamable $x0, renamable $x8, 0, 0 + RET undef $lr, implicit $w0 +...