diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 9ad60f774e9f8..e87ae7d71fffe 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -568,7 +568,6 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) { static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, - bool CloneWasSimplified, MemorySSA *MSSA) { MemoryAccess *InsnDefining = MA; if (MemoryDef *DefMUD = dyn_cast(InsnDefining)) { @@ -578,18 +577,10 @@ static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, if (Instruction *NewDefMUDI = cast_or_null(VMap.lookup(DefMUDI))) { InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); - if (!CloneWasSimplified) - assert(InsnDefining && "Defining instruction cannot be nullptr."); - else if (!InsnDefining || isa(InsnDefining)) { + if (!InsnDefining || isa(InsnDefining)) { // The clone was simplified, it's no longer a MemoryDef, look up. - auto DefIt = DefMUD->getDefsIterator(); - // Since simplified clones only occur in single block cloning, a - // previous definition must exist, otherwise NewDefMUDI would not - // have been found in VMap. - assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() && - "Previous def must exist"); InsnDefining = getNewDefiningAccessForClone( - &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA); + DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA); } } } @@ -624,9 +615,9 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( NewInsn, getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap, - MPhiMap, CloneWasSimplified, MSSA), + MPhiMap, MSSA), /*Template=*/CloneWasSimplified ? nullptr : MUD, - /*CreationMustSucceed=*/CloneWasSimplified ? false : true); + /*CreationMustSucceed=*/false); if (NewUseOrDef) MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); } diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 4496d4928ebe9..650f4650eef05 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1645,9 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, // Fold the return instruction into the LDM. DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; - // We 'restore' LR into PC so it is not live out of the return block: - // Clear Restored bit. - Info.setRestored(false); } // If NoGap is true, pop consecutive registers and then leave the rest @@ -2769,6 +2766,33 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); } +void ARMFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + + // Check if all terminators do not implicitly use LR. Then we can 'restore' LR + // into PC so it is not live out of the return block: Clear the Restored bit + // in that case. + for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() != ARM::LR) + continue; + if (all_of(MF, [](const MachineBasicBlock &MBB) { + return all_of(MBB.terminators(), [](const MachineInstr &Term) { + return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET || + Term.getOpcode() == ARM::t2LDMIA_RET || + Term.getOpcode() == ARM::tPOP_RET; + }); + })) { + Info.setRestored(false); + break; + } + } +} + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const { TargetFrameLowering::getCalleeSaves(MF, SavedRegs); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 16f2ce6bea6f1..8d2b8beb9a58f 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -59,6 +59,9 @@ class ARMFrameLowering : public TargetFrameLowering { void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 8daeb92130ba3..c6f3d4a5c71b9 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -392,6 +392,8 @@ int FunctionComparator::cmpConstants(const Constant *L, case Value::ConstantExprVal: { const ConstantExpr *LE = cast(L); const ConstantExpr *RE = cast(R); + if (int Res = cmpNumbers(LE->getOpcode(), RE->getOpcode())) + return Res; unsigned NumOperandsL = LE->getNumOperands(); unsigned NumOperandsR = RE->getNumOperands(); if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) @@ -401,6 +403,29 @@ int FunctionComparator::cmpConstants(const Constant *L, cast(RE->getOperand(i)))) return Res; } + if (LE->isCompare()) + if (int Res = cmpNumbers(LE->getPredicate(), RE->getPredicate())) + return Res; + if (auto *GEPL = dyn_cast(LE)) { + auto *GEPR = cast(RE); + if (int Res = cmpTypes(GEPL->getSourceElementType(), + GEPR->getSourceElementType())) + return Res; + if (int Res = cmpNumbers(GEPL->isInBounds(), GEPR->isInBounds())) + return Res; + if (int Res = cmpNumbers(GEPL->getInRangeIndex().value_or(unsigned(-1)), + GEPR->getInRangeIndex().value_or(unsigned(-1)))) + return Res; + } + if (auto *OBOL = dyn_cast(LE)) { + auto *OBOR = cast(RE); + if (int Res = + cmpNumbers(OBOL->hasNoUnsignedWrap(), OBOR->hasNoUnsignedWrap())) + return Res; + if (int Res = + cmpNumbers(OBOL->hasNoSignedWrap(), OBOR->hasNoSignedWrap())) + return Res; + } return 0; } case Value::BlockAddressVal: { diff --git a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll new file mode 100644 index 0000000000000..1dbb21f40a761 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=thumbv7m-none-none-eabi < %s | FileCheck %s + +target datalayout = "e-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +%struct.wibble = type { [30 x i8], i8, i32 } +%struct.eggs = type { i32, [30 x i8], i8, i8, i8, [3 x i8] } + +@global = external global [3 x %struct.wibble], align 4 +@global.1 = external global [3 x %struct.wibble], align 4 + +; Test case to make sure calling an outlined function does not clobber LR used +; by a tail call in caller. +define void @test(ptr nocapture noundef writeonly %arg, i32 noundef %arg1, i8 noundef zeroext %arg2) unnamed_addr #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: cmp r1, #2 +; CHECK-NEXT: beq .LBB0_3 +; CHECK-NEXT: @ %bb.1: @ %bb +; CHECK-NEXT: cmp r1, #1 +; CHECK-NEXT: bne .LBB0_5 +; CHECK-NEXT: @ %bb.2: @ %bb4 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: strb.w r1, [r0, #36] +; CHECK-NEXT: movs r1, #30 +; CHECK-NEXT: strb.w r1, [r0, #34] +; CHECK-NEXT: add.w r1, r2, r2, lsl #3 +; CHECK-NEXT: ldr r2, .LCPI0_1 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_3: @ %bb14 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: strb.w r1, [r0, #36] +; CHECK-NEXT: movs r1, #30 +; CHECK-NEXT: strb.w r1, [r0, #34] +; CHECK-NEXT: add.w r1, r2, r2, lsl #3 +; CHECK-NEXT: ldr r2, .LCPI0_0 +; CHECK-NEXT: .LBB0_4: @ %bb4 +; CHECK-NEXT: add.w r1, r2, r1, lsl #2 +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: movs r2, #30 +; CHECK-NEXT: b __aeabi_memcpy +; CHECK-NEXT: .LBB0_5: @ %bb24 +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: bl wombat +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long global.1 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long global +bb: + %gep = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 4 + %zext = zext i8 %arg2 to i32 + switch i32 %arg1, label %bb24 [ + i32 1, label %bb4 + i32 2, label %bb14 + ] + +bb4: ; preds = %bb3 + store i8 1, ptr %gep, align 4, !tbaa !6 + %gep5 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext + %gep6 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext, i32 2 + %load = load i32, ptr %gep6, align 4, !tbaa !11 + %gep7 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext, i32 1 + %load8 = load i8, ptr %gep7, align 2, !tbaa !13 + %gep9 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 3 + %gep10 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 2 + store i8 30, ptr %gep10, align 2, !tbaa !16 + %gep11 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 1 + tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(30) %gep11, ptr noundef nonnull align 4 dereferenceable(30) %gep5, i32 30, i1 false) + br label %bb26 + +bb14: ; preds = %bb12 + store i8 1, ptr %gep, align 4, !tbaa !6 + %gep16 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext + %gep17 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext, i32 2 + %load18 = load i32, ptr %gep17, align 4, !tbaa !21 + %gep19 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext, i32 1 + %load20 = load i8, ptr %gep19, align 2, !tbaa !23 + %gep21 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 3 + %gep22 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 2 + store i8 30, ptr %gep22, align 2, !tbaa !16 + %gep23 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 1 + tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(30) %gep23, ptr noundef nonnull align 4 dereferenceable(30) %gep16, i32 30, i1 false) + br label %bb26 + +bb24: ; preds = %bb + tail call void @wombat() + tail call void asm sideeffect "", ""() + br label %bb26 + +bb26: ; preds = %bb24, %bb14, %bb12, %bb4, %bb3 + ret void +} + +declare void @wombat() + +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #2 + +attributes #0 = { minsize noimplicitfloat nounwind optsize } + +!6 = !{!7, !9, i64 36} +!7 = !{!"", !8, i64 0, !9, i64 4, !9, i64 34, !9, i64 35, !9, i64 36, !9, i64 37} +!8 = !{!"long", !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!12, !8, i64 32} +!12 = !{!"B", !9, i64 0, !9, i64 30, !8, i64 32} +!13 = !{!12, !9, i64 30} +!14 = !{!7, !8, i64 0} +!15 = !{!7, !9, i64 35} +!16 = !{!7, !9, i64 34} +!21 = !{!22, !8, i64 32} +!22 = !{!"A", !9, i64 0, !9, i64 30, !8, i64 32} +!23 = !{!22, !9, i64 30} diff --git a/llvm/test/Transforms/MergeFunc/constexpr.ll b/llvm/test/Transforms/MergeFunc/constexpr.ll new file mode 100644 index 0000000000000..9fb7806017422 --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/constexpr.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt -S -passes=mergefunc -mergefunc-use-aliases < %s | FileCheck %s + +@g1 = external unnamed_addr global i8 +@g2 = external unnamed_addr global i8 + +;. +; CHECK: @g1 = external unnamed_addr global i8 +; CHECK: @g2 = external unnamed_addr global i8 +;. +define i1 @f1() unnamed_addr { +; CHECK-LABEL: define i1 @f1() unnamed_addr { +; CHECK-NEXT: ret i1 icmp eq (ptr @g1, ptr @g2) +; + ret i1 icmp eq (ptr @g1, ptr @g2) +} + +define i1 @f2() unnamed_addr { +; CHECK-LABEL: define i1 @f2() unnamed_addr { +; CHECK-NEXT: ret i1 icmp ne (ptr @g1, ptr @g2) +; + ret i1 icmp ne (ptr @g1, ptr @g2) +} + +define ptr @f3() unnamed_addr { +; CHECK-LABEL: define ptr @f3() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr @g1, i64 2) +; + ret ptr getelementptr inbounds (i8, ptr @g1, i64 2) +} + +define ptr @f4() unnamed_addr { +; CHECK-LABEL: define ptr @f4() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr (i16, ptr @g1, i64 2) +; + ret ptr getelementptr (i16, ptr @g1, i64 2) +} + +define ptr @f5() unnamed_addr { +; CHECK-LABEL: define ptr @f5() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr (i8, ptr @g1, i64 2) +; + ret ptr getelementptr (i8, ptr @g1, i64 2) +} + +define i64 @f6() unnamed_addr { +; CHECK-LABEL: define i64 @f6() unnamed_addr { +; CHECK-NEXT: ret i64 add nuw (i64 ptrtoint (ptr @g1 to i64), i64 1) +; + ret i64 add nuw (i64 ptrtoint (ptr @g1 to i64), i64 1) +} + +define i64 @f7() unnamed_addr { +; CHECK-LABEL: define i64 @f7() unnamed_addr { +; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr @g1 to i64), i64 1) +; + ret i64 add (i64 ptrtoint (ptr @g1 to i64), i64 1) +} + +define i64 @f8() unnamed_addr { +; CHECK-LABEL: define i64 @f8() unnamed_addr { +; CHECK-NEXT: ret i64 sub (i64 ptrtoint (ptr @g1 to i64), i64 1) +; + ret i64 sub (i64 ptrtoint (ptr @g1 to i64), i64 1) +} + +define ptr @f10() unnamed_addr { +; CHECK-LABEL: define ptr @f10() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, inrange i64 1) +; + ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, inrange i64 1) +} + +define ptr @f11() unnamed_addr { +; CHECK-LABEL: define ptr @f11() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, i64 1) +; + ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, i64 1) +} + +define ptr @f12() unnamed_addr { +; CHECK-LABEL: define ptr @f12() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, inrange i64 0, i64 1) +; + ret ptr getelementptr ([4 x i32], ptr @g1, inrange i64 0, i64 1) +} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll new file mode 100644 index 0000000000000..2aaf777683e11 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes="loop-mssa(loop-instsimplify,simple-loop-unswitch)" < %s | FileCheck %s + +@vtable = constant ptr @foo + +declare void @foo() memory(none) +declare void @bar() + +; The call becomes known readnone after simplification, but still have a +; MemoryAccess. Make sure this does not lead to an assertion failure. +define void @test(i1 %c) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + br i1 %c, label %exit, label %loop + +exit: + ret void +} + +; Variant with another access after the call. +define void @test2(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test2( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + call void @bar() + br i1 %c, label %exit, label %loop + +exit: + ret void +} + +; Variant with another access after the call and no access before the call. +define void @test3(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test3( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: br label [[SPLIT_US:%.*]] +; CHECK: split.us: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br label [[SPLIT:%.*]] +; CHECK: split: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + br label %split + +split: + call void %fn() + call void @bar() + br i1 %c, label %exit, label %loop + +exit: + ret void +}