Skip to content

Cherry-pick a number of fixes #163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions llvm/lib/Analysis/MemorySSAUpdater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,6 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
const ValueToValueMapTy &VMap,
PhiToDefMap &MPhiMap,
bool CloneWasSimplified,
MemorySSA *MSSA) {
MemoryAccess *InsnDefining = MA;
if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) {
Expand All @@ -578,18 +577,10 @@ static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
if (Instruction *NewDefMUDI =
cast_or_null<Instruction>(VMap.lookup(DefMUDI))) {
InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
if (!CloneWasSimplified)
assert(InsnDefining && "Defining instruction cannot be nullptr.");
else if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
// The clone was simplified, it's no longer a MemoryDef, look up.
auto DefIt = DefMUD->getDefsIterator();
// Since simplified clones only occur in single block cloning, a
// previous definition must exist, otherwise NewDefMUDI would not
// have been found in VMap.
assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() &&
"Previous def must exist");
InsnDefining = getNewDefiningAccessForClone(
&*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA);
DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA);
}
}
}
Expand Down Expand Up @@ -624,9 +615,9 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
NewInsn,
getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap,
MPhiMap, CloneWasSimplified, MSSA),
MPhiMap, MSSA),
/*Template=*/CloneWasSimplified ? nullptr : MUD,
/*CreationMustSucceed=*/CloneWasSimplified ? false : true);
/*CreationMustSucceed=*/false);
if (NewUseOrDef)
MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
}
Expand Down
30 changes: 27 additions & 3 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1645,9 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
// Fold the return instruction into the LDM.
DeleteRet = true;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
// We 'restore' LR into PC so it is not live out of the return block:
// Clear Restored bit.
Info.setRestored(false);
}

// If NoGap is true, pop consecutive registers and then leave the rest
Expand Down Expand Up @@ -2769,6 +2766,33 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
}

void ARMFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);

MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.isCalleeSavedInfoValid())
return;

// Check if all terminators do not implicitly use LR. Then we can 'restore' LR
// into PC so it is not live out of the return block: Clear the Restored bit
// in that case.
for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
if (Info.getReg() != ARM::LR)
continue;
if (all_of(MF, [](const MachineBasicBlock &MBB) {
return all_of(MBB.terminators(), [](const MachineInstr &Term) {
return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
Term.getOpcode() == ARM::t2LDMIA_RET ||
Term.getOpcode() == ARM::tPOP_RET;
});
})) {
Info.setRestored(false);
break;
}
}
}

void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
BitVector &SavedRegs) const {
TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/ARM/ARMFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ class ARMFrameLowering : public TargetFrameLowering {
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;

void processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS = nullptr) const override;

void adjustForSegmentedStacks(MachineFunction &MF,
MachineBasicBlock &MBB) const override;

Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Transforms/Utils/FunctionComparator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@ int FunctionComparator::cmpConstants(const Constant *L,
case Value::ConstantExprVal: {
const ConstantExpr *LE = cast<ConstantExpr>(L);
const ConstantExpr *RE = cast<ConstantExpr>(R);
if (int Res = cmpNumbers(LE->getOpcode(), RE->getOpcode()))
return Res;
unsigned NumOperandsL = LE->getNumOperands();
unsigned NumOperandsR = RE->getNumOperands();
if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
Expand All @@ -401,6 +403,29 @@ int FunctionComparator::cmpConstants(const Constant *L,
cast<Constant>(RE->getOperand(i))))
return Res;
}
if (LE->isCompare())
if (int Res = cmpNumbers(LE->getPredicate(), RE->getPredicate()))
return Res;
if (auto *GEPL = dyn_cast<GEPOperator>(LE)) {
auto *GEPR = cast<GEPOperator>(RE);
if (int Res = cmpTypes(GEPL->getSourceElementType(),
GEPR->getSourceElementType()))
return Res;
if (int Res = cmpNumbers(GEPL->isInBounds(), GEPR->isInBounds()))
return Res;
if (int Res = cmpNumbers(GEPL->getInRangeIndex().value_or(unsigned(-1)),
GEPR->getInRangeIndex().value_or(unsigned(-1))))
return Res;
}
if (auto *OBOL = dyn_cast<OverflowingBinaryOperator>(LE)) {
auto *OBOR = cast<OverflowingBinaryOperator>(RE);
if (int Res =
cmpNumbers(OBOL->hasNoUnsignedWrap(), OBOR->hasNoUnsignedWrap()))
return Res;
if (int Res =
cmpNumbers(OBOL->hasNoSignedWrap(), OBOR->hasNoSignedWrap()))
return Res;
}
return 0;
}
case Value::BlockAddressVal: {
Expand Down
119 changes: 119 additions & 0 deletions llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=thumbv7m-none-none-eabi < %s | FileCheck %s

target datalayout = "e-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

%struct.wibble = type { [30 x i8], i8, i32 }
%struct.eggs = type { i32, [30 x i8], i8, i8, i8, [3 x i8] }

@global = external global [3 x %struct.wibble], align 4
@global.1 = external global [3 x %struct.wibble], align 4

; Test case to make sure calling an outlined function does not clobber LR used
; by a tail call in caller.
define void @test(ptr nocapture noundef writeonly %arg, i32 noundef %arg1, i8 noundef zeroext %arg2) unnamed_addr #0 {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %bb
; CHECK-NEXT: cmp r1, #2
; CHECK-NEXT: beq .LBB0_3
; CHECK-NEXT: @ %bb.1: @ %bb
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: bne .LBB0_5
; CHECK-NEXT: @ %bb.2: @ %bb4
; CHECK-NEXT: movs r1, #1
; CHECK-NEXT: strb.w r1, [r0, #36]
; CHECK-NEXT: movs r1, #30
; CHECK-NEXT: strb.w r1, [r0, #34]
; CHECK-NEXT: add.w r1, r2, r2, lsl #3
; CHECK-NEXT: ldr r2, .LCPI0_1
; CHECK-NEXT: b .LBB0_4
; CHECK-NEXT: .LBB0_3: @ %bb14
; CHECK-NEXT: movs r1, #1
; CHECK-NEXT: strb.w r1, [r0, #36]
; CHECK-NEXT: movs r1, #30
; CHECK-NEXT: strb.w r1, [r0, #34]
; CHECK-NEXT: add.w r1, r2, r2, lsl #3
; CHECK-NEXT: ldr r2, .LCPI0_0
; CHECK-NEXT: .LBB0_4: @ %bb4
; CHECK-NEXT: add.w r1, r2, r1, lsl #2
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: movs r2, #30
; CHECK-NEXT: b __aeabi_memcpy
; CHECK-NEXT: .LBB0_5: @ %bb24
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: bl wombat
; CHECK-NEXT: @APP
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.6:
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: .long global.1
; CHECK-NEXT: .LCPI0_1:
; CHECK-NEXT: .long global
bb:
%gep = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 4
%zext = zext i8 %arg2 to i32
switch i32 %arg1, label %bb24 [
i32 1, label %bb4
i32 2, label %bb14
]

bb4: ; preds = %bb3
store i8 1, ptr %gep, align 4, !tbaa !6
%gep5 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext
%gep6 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext, i32 2
%load = load i32, ptr %gep6, align 4, !tbaa !11
%gep7 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext, i32 1
%load8 = load i8, ptr %gep7, align 2, !tbaa !13
%gep9 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 3
%gep10 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 2
store i8 30, ptr %gep10, align 2, !tbaa !16
%gep11 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 1
tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(30) %gep11, ptr noundef nonnull align 4 dereferenceable(30) %gep5, i32 30, i1 false)
br label %bb26

bb14: ; preds = %bb12
store i8 1, ptr %gep, align 4, !tbaa !6
%gep16 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext
%gep17 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext, i32 2
%load18 = load i32, ptr %gep17, align 4, !tbaa !21
%gep19 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext, i32 1
%load20 = load i8, ptr %gep19, align 2, !tbaa !23
%gep21 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 3
%gep22 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 2
store i8 30, ptr %gep22, align 2, !tbaa !16
%gep23 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 1
tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(30) %gep23, ptr noundef nonnull align 4 dereferenceable(30) %gep16, i32 30, i1 false)
br label %bb26

bb24: ; preds = %bb
tail call void @wombat()
tail call void asm sideeffect "", ""()
br label %bb26

bb26: ; preds = %bb24, %bb14, %bb12, %bb4, %bb3
ret void
}

declare void @wombat()

declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #2

attributes #0 = { minsize noimplicitfloat nounwind optsize }

!6 = !{!7, !9, i64 36}
!7 = !{!"", !8, i64 0, !9, i64 4, !9, i64 34, !9, i64 35, !9, i64 36, !9, i64 37}
!8 = !{!"long", !9, i64 0}
!9 = !{!"omnipotent char", !10, i64 0}
!10 = !{!"Simple C/C++ TBAA"}
!11 = !{!12, !8, i64 32}
!12 = !{!"B", !9, i64 0, !9, i64 30, !8, i64 32}
!13 = !{!12, !9, i64 30}
!14 = !{!7, !8, i64 0}
!15 = !{!7, !9, i64 35}
!16 = !{!7, !9, i64 34}
!21 = !{!22, !8, i64 32}
!22 = !{!"A", !9, i64 0, !9, i64 30, !8, i64 32}
!23 = !{!22, !9, i64 30}
86 changes: 86 additions & 0 deletions llvm/test/Transforms/MergeFunc/constexpr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
; RUN: opt -S -passes=mergefunc -mergefunc-use-aliases < %s | FileCheck %s

@g1 = external unnamed_addr global i8
@g2 = external unnamed_addr global i8

;.
; CHECK: @g1 = external unnamed_addr global i8
; CHECK: @g2 = external unnamed_addr global i8
;.
define i1 @f1() unnamed_addr {
; CHECK-LABEL: define i1 @f1() unnamed_addr {
; CHECK-NEXT: ret i1 icmp eq (ptr @g1, ptr @g2)
;
ret i1 icmp eq (ptr @g1, ptr @g2)
}

define i1 @f2() unnamed_addr {
; CHECK-LABEL: define i1 @f2() unnamed_addr {
; CHECK-NEXT: ret i1 icmp ne (ptr @g1, ptr @g2)
;
ret i1 icmp ne (ptr @g1, ptr @g2)
}

define ptr @f3() unnamed_addr {
; CHECK-LABEL: define ptr @f3() unnamed_addr {
; CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr @g1, i64 2)
;
ret ptr getelementptr inbounds (i8, ptr @g1, i64 2)
}

define ptr @f4() unnamed_addr {
; CHECK-LABEL: define ptr @f4() unnamed_addr {
; CHECK-NEXT: ret ptr getelementptr (i16, ptr @g1, i64 2)
;
ret ptr getelementptr (i16, ptr @g1, i64 2)
}

define ptr @f5() unnamed_addr {
; CHECK-LABEL: define ptr @f5() unnamed_addr {
; CHECK-NEXT: ret ptr getelementptr (i8, ptr @g1, i64 2)
;
ret ptr getelementptr (i8, ptr @g1, i64 2)
}

define i64 @f6() unnamed_addr {
; CHECK-LABEL: define i64 @f6() unnamed_addr {
; CHECK-NEXT: ret i64 add nuw (i64 ptrtoint (ptr @g1 to i64), i64 1)
;
ret i64 add nuw (i64 ptrtoint (ptr @g1 to i64), i64 1)
}

define i64 @f7() unnamed_addr {
; CHECK-LABEL: define i64 @f7() unnamed_addr {
; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr @g1 to i64), i64 1)
;
ret i64 add (i64 ptrtoint (ptr @g1 to i64), i64 1)
}

define i64 @f8() unnamed_addr {
; CHECK-LABEL: define i64 @f8() unnamed_addr {
; CHECK-NEXT: ret i64 sub (i64 ptrtoint (ptr @g1 to i64), i64 1)
;
ret i64 sub (i64 ptrtoint (ptr @g1 to i64), i64 1)
}

define ptr @f10() unnamed_addr {
; CHECK-LABEL: define ptr @f10() unnamed_addr {
; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, inrange i64 1)
;
ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, inrange i64 1)
}

define ptr @f11() unnamed_addr {
; CHECK-LABEL: define ptr @f11() unnamed_addr {
; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, i64 1)
;
ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, i64 1)
}

define ptr @f12() unnamed_addr {
; CHECK-LABEL: define ptr @f12() unnamed_addr {
; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, inrange i64 0, i64 1)
;
ret ptr getelementptr ([4 x i32], ptr @g1, inrange i64 0, i64 1)
}
Loading