From 0bc120575ed0c36efef5a2421282186d761eb7cd Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 4 Mar 2024 17:40:50 +0000 Subject: [PATCH] [Codegen] Make Width in getMemOperandsWithOffsetWidth a LocationSize. This is another part of #70452 which makes getMemOperandsWithOffsetWidth use a LocationSize for Width, as opposed to the unsigned it currently uses. The advantages on it's own are not super high if getMemOperandsWithOffsetWidth usually uses known sizes, but if the values can come from an MMO it can help be more accurate in case they are Unknown (and in the future, scalable). --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 2 +- llvm/lib/CodeGen/MachineScheduler.cpp | 13 ++++---- llvm/lib/CodeGen/TargetInstrInfo.cpp | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 2 +- .../lib/Target/AMDGPU/SIInsertHardClauses.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++-- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +- llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 8 ++--- llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 4 +-- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 11 ++++--- llvm/lib/Target/Lanai/LanaiInstrInfo.cpp | 11 ++++--- llvm/lib/Target/Lanai/LanaiInstrInfo.h | 4 +-- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 15 +++++---- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 4 +-- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 11 ++++--- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 4 +-- llvm/lib/Target/X86/X86InstrInfo.cpp | 2 +- llvm/lib/Target/X86/X86InstrInfo.h | 2 +- llvm/test/CodeGen/AMDGPU/cluster_stores.ll | 32 +++++++++---------- .../Target/RISCV/RISCVInstrInfoTest.cpp | 2 +- 21 files changed, 73 insertions(+), 68 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index e7787aafb98e2..be4ee5b6f9e29 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1446,7 +1446,7 @@ class TargetInstrInfo : public MCInstrInfo { /// abstraction that supports negative offsets. virtual bool getMemOperandsWithOffsetWidth( const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { return false; } diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 3bbd126bdaf1a..0d5bf32993878 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1729,11 +1729,11 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { SUnit *SU; SmallVector BaseOps; int64_t Offset; - unsigned Width; + LocationSize Width; bool OffsetIsScalable; MemOpInfo(SUnit *SU, ArrayRef BaseOps, - int64_t Offset, bool OffsetIsScalable, unsigned Width) + int64_t Offset, bool OffsetIsScalable, LocationSize Width) : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), Width(Width), OffsetIsScalable(OffsetIsScalable) {} @@ -1866,11 +1866,12 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( auto MemOpb = MemOpRecords[NextIdx]; unsigned ClusterLength = 2; - unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width; + unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() + + MemOpb.Width.getValue().getKnownMinValue(); if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) { ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1; - CurrentClusterBytes = - SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; + CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + + MemOpb.Width.getValue().getKnownMinValue(); } if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset, @@ -1940,7 +1941,7 @@ void BaseMemOpClusterMutation::collectMemOpRecords( SmallVector BaseOps; int64_t Offset; bool OffsetIsScalable; - unsigned Width; + LocationSize Width = 0; if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, Width, TRI)) { MemOpRecords.push_back( diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 4783742a14ad7..5b02c1bc39c0a 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1365,7 +1365,7 @@ bool TargetInstrInfo::getMemOperandWithOffset( const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { SmallVector BaseOps; - unsigned Width; + LocationSize Width = 0; if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, Width, TRI) || BaseOps.size() != 1) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 17e0e36ee6821..5df691f35275d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2675,7 +2675,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { bool AArch64InstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 6c6689091ead4..2f10f80f4bdf7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -155,7 +155,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool getMemOperandsWithOffsetWidth( const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override; /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`. diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp index 01580fe345ba2..dcc60765cc203 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -208,7 +208,7 @@ class SIInsertHardClauses : public MachineFunctionPass { int64_t Dummy1; bool Dummy2; - unsigned Dummy3; + LocationSize Dummy3 = 0; SmallVector BaseOps; if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index edd87e340d10d..978e0d1f39fea 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -360,7 +360,7 @@ static bool isStride64(unsigned Opc) { bool SIInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; @@ -424,7 +424,7 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth( DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); Width = getOpSize(LdSt, DataOpIdx); DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); - Width += getOpSize(LdSt, DataOpIdx); + Width = Width.getValue() + getOpSize(LdSt, DataOpIdx); } else { Width = getOpSize(LdSt, DataOpIdx); } @@ -3647,7 +3647,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, const MachineInstr &MIb) const { SmallVector BaseOps0, BaseOps1; int64_t Offset0, Offset1; - unsigned Dummy0, Dummy1; + LocationSize Dummy0 = 0, Dummy1 = 0; bool Offset0IsScalable, Offset1IsScalable; if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable, Dummy0, &RI) || diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index dab2cb2946ac9..a62bf779fe2e2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -240,7 +240,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { bool getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, - bool &OffsetIsScalable, unsigned &Width, + bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final; bool shouldClusterMemOps(ArrayRef BaseOps1, diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 619c7dc69f9b2..b9bf26ba7cca1 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -3070,7 +3070,7 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, /// Get the base register and byte offset of a load/store instr. bool HexagonInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { OffsetIsScalable = false; const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, Width); @@ -3286,9 +3286,9 @@ unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const { // returned in Offset and the access size is returned in AccessSize. // If the base operand has a subregister or the offset field does not contain // an immediate value, return nullptr. -MachineOperand *HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, - int64_t &Offset, - unsigned &AccessSize) const { +MachineOperand * +HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, + LocationSize &AccessSize) const { // Return if it is not a base+offset type instruction or a MemOp. if (getAddrMode(MI) != HexagonII::BaseImmOffset && getAddrMode(MI) != HexagonII::BaseLongOffset && diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index e496995d3ff12..4efc62fd717c6 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -208,7 +208,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, - bool &OffsetIsScalable, unsigned &Width, + bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override; /// Reverses the branch condition of the specified condition list, @@ -437,7 +437,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { unsigned getAddrMode(const MachineInstr &MI) const; MachineOperand *getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, - unsigned &AccessSize) const; + LocationSize &AccessSize) const; SmallVector getBranchingInstrs(MachineBasicBlock& MBB) const; unsigned getCExtOpNum(const MachineInstr &MI) const; HexagonII::CompoundGroup diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 4df811f188df6..2d320e6b0cad7 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -395,10 +395,11 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) { HII.getAddrMode(L0) != HexagonII::BaseImmOffset) continue; int64_t Offset0; - unsigned Size0; + LocationSize Size0 = 0; MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0); // Is the access size is longer than the L1 cache line, skip the check. - if (BaseOp0 == nullptr || !BaseOp0->isReg() || Size0 >= 32) + if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() || + Size0.getValue() >= 32) continue; // Scan only up to 32 instructions ahead (to avoid n^2 complexity). for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) { @@ -408,10 +409,10 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) { HII.getAddrMode(L1) != HexagonII::BaseImmOffset) continue; int64_t Offset1; - unsigned Size1; + LocationSize Size1 = 0; MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1); - if (BaseOp1 == nullptr || !BaseOp1->isReg() || Size1 >= 32 || - BaseOp0->getReg() != BaseOp1->getReg()) + if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() || + Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg()) continue; // Check bits 3 and 4 of the offset: if they differ, a bank conflict // is unlikely. diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index 4fe725b9457fa..b8a37435f5a64 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -102,14 +102,15 @@ bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint( const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; - unsigned int WidthA = 0, WidthB = 0; + LocationSize WidthA = 0, WidthB = 0; if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); - int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; - if (LowOffset + LowWidth <= HighOffset) + LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowWidth.hasValue() && + LowOffset + (int)LowWidth.getValue() <= HighOffset) return true; } } @@ -752,7 +753,7 @@ Register LanaiInstrInfo::isStoreToStackSlot(const MachineInstr &MI, bool LanaiInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo * /*TRI*/) const { + LocationSize &Width, const TargetRegisterInfo * /*TRI*/) const { // Handle only loads/stores with base register followed by immediate offset // and with add as ALU op. if (LdSt.getNumOperands() != 4) @@ -793,7 +794,7 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth( bool LanaiInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { switch (LdSt.getOpcode()) { default: diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h index 189aedf07120f..8ad2b9237c928 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h @@ -70,12 +70,12 @@ class LanaiInstrInfo : public LanaiGenInstrInfo { bool getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, - bool &OffsetIsScalable, unsigned &Width, + bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override; bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, - int64_t &Offset, unsigned &Width, + int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const; std::pair diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 1c610b269d32d..5d37e929f8755 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2821,7 +2821,7 @@ bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const { bool PPCInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { const MachineOperand *BaseOp; OffsetIsScalable = false; @@ -2913,7 +2913,7 @@ bool PPCInstrInfo::shouldClusterMemOps( return false; int64_t Offset1 = 0, Offset2 = 0; - unsigned Width1 = 0, Width2 = 0; + LocationSize Width1 = 0, Width2 = 0; const MachineOperand *Base1 = nullptr, *Base2 = nullptr; if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || @@ -2924,7 +2924,7 @@ bool PPCInstrInfo::shouldClusterMemOps( "getMemOperandWithOffsetWidth return incorrect base op"); // The caller should already have ordered FirstMemOp/SecondMemOp by offset. assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); - return Offset1 + Width1 == Offset2; + return Offset1 + (int64_t)Width1.getValue() == Offset2; } /// GetInstSize - Return the number of bytes of code the specified @@ -5504,7 +5504,7 @@ MachineInstr *PPCInstrInfo::findLoopInstr( // memory width. Width is the size of memory that is being loaded/stored. bool PPCInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { + LocationSize &Width, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3) return false; @@ -5542,14 +5542,15 @@ bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; - unsigned int WidthA = 0, WidthB = 0; + LocationSize WidthA = 0, WidthB = 0; if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); - int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; - if (LowOffset + LowWidth <= HighOffset) + LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowWidth.hasValue() && + LowOffset + (int)LowWidth.getValue() <= HighOffset) return true; } } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 43ed668a941bb..045932dc0d3ba 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -543,7 +543,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// loaded/stored (e.g. 1, 2, 4, 8). bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, - int64_t &Offset, unsigned &Width, + int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const; bool optimizeCmpPostRA(MachineInstr &MI) const; @@ -553,7 +553,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, - bool &OffsetIsScalable, unsigned &Width, + bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override; /// Returns true if the two given memory operations should be scheduled diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 2abe015c9f9cd..58aeac83abf71 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2195,7 +2195,7 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; @@ -2300,7 +2300,7 @@ bool RISCVInstrInfo::shouldClusterMemOps( // function) and set it as appropriate. bool RISCVInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, - unsigned &Width, const TargetRegisterInfo *TRI) const { + LocationSize &Width, const TargetRegisterInfo *TRI) const { if (!LdSt.mayLoadOrStore()) return false; @@ -2339,14 +2339,15 @@ bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint( const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; - unsigned int WidthA = 0, WidthB = 0; + LocationSize WidthA = 0, WidthB = 0; if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { if (BaseOpA->isIdenticalTo(*BaseOpB)) { int LowOffset = std::min(OffsetA, OffsetB); int HighOffset = std::max(OffsetA, OffsetB); - int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; - if (LowOffset + LowWidth <= HighOffset) + LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowWidth.hasValue() && + LowOffset + (int)LowWidth.getValue() <= HighOffset) return true; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 2d56734259963..8a312ee5e7795 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -156,7 +156,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { bool getMemOperandsWithOffsetWidth( const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override; bool shouldClusterMemOps(ArrayRef BaseOps1, @@ -168,7 +168,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, - int64_t &Offset, unsigned &Width, + int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const; bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 0f21880f6df90..fe6e835b5d1ff 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4514,7 +4514,7 @@ bool X86InstrInfo::preservesZeroValueInReg( bool X86InstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &MemOp, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const { const MCInstrDesc &Desc = MemOp.getDesc(); int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 996a24d9e8a94..0e5fcbeda08f7 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -375,7 +375,7 @@ class X86InstrInfo final : public X86GenInstrInfo { bool getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, int64_t &Offset, - bool &OffsetIsScalable, unsigned &Width, + bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override; bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll index 46b237b710691..b6948dab6bf9f 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll +++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll @@ -11,11 +11,11 @@ ; DBG-LABEL: cluster_load_cluster_store: -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) ; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]]) ; DBG: Cluster ld/st SU([[L2]]) - SU([[L3:[0-9]+]]) @@ -136,11 +136,11 @@ bb: ; DBG-LABEL: cluster_load_valu_cluster_store: -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(4) ; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]]) ; DBG: Cluster ld/st SU([[L2]]) - SU([[L3:[0-9]+]]) @@ -266,8 +266,8 @@ bb: ; Cluster loads from the same texture with different coordinates ; DBG-LABEL: cluster_image_load: -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) ; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]] ; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_LOAD ; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_LOAD @@ -335,8 +335,8 @@ entry: ; Don't cluster loads from different textures ; DBG-LABEL: no_cluster_image_load: -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) ; DBG-NOT: {{^}}Cluster ld/st define amdgpu_ps void @no_cluster_image_load(<8 x i32> inreg %src1, <8 x i32> inreg %src2, <8 x i32> inreg %dst, i32 %x, i32 %y) { ; GFX9-LABEL: no_cluster_image_load: @@ -387,8 +387,8 @@ entry: ; Cluster loads from the same texture and sampler with different coordinates ; DBG-LABEL: cluster_image_sample: -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 -; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: LocationSize::precise(16) ; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]] ; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_SAMPLE ; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_SAMPLE diff --git a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp index 5f3ce53f5d274..c690466071a1c 100644 --- a/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp +++ b/llvm/unittests/Target/RISCV/RISCVInstrInfoTest.cpp @@ -163,7 +163,7 @@ TEST_P(RISCVInstrInfoTest, GetMemOperandsWithOffsetWidth) { DebugLoc DL; SmallVector BaseOps; - unsigned Width; + LocationSize Width = 0; int64_t Offset; bool OffsetIsScalable;