From 57631cbf386bd98e9d2932bce7242a96e6810a8e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 24 Nov 2023 20:27:43 +0000 Subject: [PATCH 1/5] [Thumb] Add test case where the machine-outliner clobbers LR. Add ad test case where `bl OUTLINED_FUNCTION_0` clobbers LR, which in turn is used the later call to memcpy to return to the caller. (cherry picked from commit 20f634f275b431ff256ba45cbcbb6dc5bd945fb3) --- .../outlined-fn-may-clobber-lr-in-caller.ll | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll diff --git a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll new file mode 100644 index 0000000000000..d81d008b44bed --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=thumbv7m-none-none-eabi < %s | FileCheck %s + +target datalayout = "e-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +%struct.wibble = type { [30 x i8], i8, i32 } +%struct.eggs = type { i32, [30 x i8], i8, i8, i8, [3 x i8] } + +@global = external global [3 x %struct.wibble], align 4 +@global.1 = external global [3 x %struct.wibble], align 4 + +; Test case to make sure calling an outlined function does not clobber LR used +; by a tail call in caller. +; FIXME: Currently bl OUTLINED_FUNCTION_0 clobbers LR, which in turn is used +; by the later call to memcpy to return to the caller. +define void @test(ptr nocapture noundef writeonly %arg, i32 noundef %arg1, i8 noundef zeroext %arg2) unnamed_addr #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: cmp r1, #2 +; CHECK-NEXT: beq .LBB0_3 +; CHECK-NEXT: @ %bb.1: @ %bb +; CHECK-NEXT: cmp r1, #1 +; CHECK-NEXT: bne .LBB0_5 +; CHECK-NEXT: @ %bb.2: @ %bb4 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldr r2, .LCPI0_1 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_3: @ %bb14 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldr r2, .LCPI0_0 +; CHECK-NEXT: .LBB0_4: @ %bb4 +; CHECK-NEXT: add.w r1, r2, r1, lsl #2 +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: movs r2, #30 +; CHECK-NEXT: b __aeabi_memcpy +; CHECK-NEXT: .LBB0_5: @ %bb24 +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: bl wombat +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.6: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long global.1 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long global +bb: + %gep = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 4 + %zext = zext i8 %arg2 to i32 + switch i32 %arg1, label %bb24 [ + i32 1, label %bb4 + i32 2, label %bb14 + ] + +bb4: ; preds = %bb3 + store i8 1, ptr %gep, align 4, !tbaa !6 + %gep5 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext + %gep6 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext, i32 2 + %load = load i32, ptr %gep6, align 4, !tbaa !11 + %gep7 = getelementptr inbounds [3 x %struct.wibble], ptr @global, i32 0, i32 %zext, i32 1 + %load8 = load i8, ptr %gep7, align 2, !tbaa !13 + %gep9 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 3 + %gep10 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 2 + store i8 30, ptr %gep10, align 2, !tbaa !16 + %gep11 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 1 + tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(30) %gep11, ptr noundef nonnull align 4 dereferenceable(30) %gep5, i32 30, i1 false) + br label %bb26 + +bb14: ; preds = %bb12 + store i8 1, ptr %gep, align 4, !tbaa !6 + %gep16 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext + %gep17 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext, i32 2 + %load18 = load i32, ptr %gep17, align 4, !tbaa !21 + %gep19 = getelementptr inbounds [3 x %struct.wibble], ptr @global.1, i32 0, i32 %zext, i32 1 + %load20 = load i8, ptr %gep19, align 2, !tbaa !23 + %gep21 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 3 + %gep22 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 2 + store i8 30, ptr %gep22, align 2, !tbaa !16 + %gep23 = getelementptr inbounds %struct.eggs, ptr %arg, i32 0, i32 1 + tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(30) %gep23, ptr noundef nonnull align 4 dereferenceable(30) %gep16, i32 30, i1 false) + br label %bb26 + +bb24: ; preds = %bb + tail call void @wombat() + tail call void asm sideeffect "", ""() + br label %bb26 + +bb26: ; preds = %bb24, %bb14, %bb12, %bb4, %bb3 + ret void +} + +declare void @wombat() + +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #2 + +attributes #0 = { minsize noimplicitfloat nounwind optsize } + +!6 = !{!7, !9, i64 36} +!7 = !{!"", !8, i64 0, !9, i64 4, !9, i64 34, !9, i64 35, !9, i64 36, !9, i64 37} +!8 = !{!"long", !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!12, !8, i64 32} +!12 = !{!"B", !9, i64 0, !9, i64 30, !8, i64 32} +!13 = !{!12, !9, i64 30} +!14 = !{!7, !8, i64 0} +!15 = !{!7, !9, i64 35} +!16 = !{!7, !9, i64 34} +!21 = !{!22, !8, i64 32} +!22 = !{!"A", !9, i64 0, !9, i64 30, !8, i64 32} +!23 = !{!22, !9, i64 30} From 2409b7624bfd50a3a10ab425514d1d694973416f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 20 Dec 2023 16:56:15 +0100 Subject: [PATCH 2/5] [ARM] Check all terms in emitPopInst when clearing Restored for LR. (#75527) emitPopInst checks a single function exit MBB. If other paths also exit the function and any of there terminators uses LR implicitly, it is not save to clear the Restored bit. Check all terminators for the function before clearing Restored. This fixes a mis-compile in outlined-fn-may-clobber-lr-in-caller.ll where the machine-outliner previously introduced BLs that clobbered LR which in turn is used by the tail call return. Alternative to #73553 (cherry picked from commit b1a5ee1febd8a903cec3dfdad61d57900dc3823e) --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 30 +++++++++++++++++-- llvm/lib/Target/ARM/ARMFrameLowering.h | 3 ++ .../outlined-fn-may-clobber-lr-in-caller.ll | 14 ++++++--- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 4496d4928ebe9..650f4650eef05 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1645,9 +1645,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, // Fold the return instruction into the LDM. DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; - // We 'restore' LR into PC so it is not live out of the return block: - // Clear Restored bit. - Info.setRestored(false); } // If NoGap is true, pop consecutive registers and then leave the rest @@ -2769,6 +2766,33 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); } +void ARMFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + + // Check if all terminators do not implicitly use LR. Then we can 'restore' LR + // into PC so it is not live out of the return block: Clear the Restored bit + // in that case. + for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() != ARM::LR) + continue; + if (all_of(MF, [](const MachineBasicBlock &MBB) { + return all_of(MBB.terminators(), [](const MachineInstr &Term) { + return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET || + Term.getOpcode() == ARM::t2LDMIA_RET || + Term.getOpcode() == ARM::tPOP_RET; + }); + })) { + Info.setRestored(false); + break; + } + } +} + void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const { TargetFrameLowering::getCalleeSaves(MF, SavedRegs); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 16f2ce6bea6f1..8d2b8beb9a58f 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -59,6 +59,9 @@ class ARMFrameLowering : public TargetFrameLowering { void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll index d81d008b44bed..1dbb21f40a761 100644 --- a/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll +++ b/llvm/test/CodeGen/Thumb2/outlined-fn-may-clobber-lr-in-caller.ll @@ -11,8 +11,6 @@ target datalayout = "e-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" ; Test case to make sure calling an outlined function does not clobber LR used ; by a tail call in caller. -; FIXME: Currently bl OUTLINED_FUNCTION_0 clobbers LR, which in turn is used -; by the later call to memcpy to return to the caller. define void @test(ptr nocapture noundef writeonly %arg, i32 noundef %arg1, i8 noundef zeroext %arg2) unnamed_addr #0 { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %bb @@ -22,11 +20,19 @@ define void @test(ptr nocapture noundef writeonly %arg, i32 noundef %arg1, i8 no ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: bne .LBB0_5 ; CHECK-NEXT: @ %bb.2: @ %bb4 -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: strb.w r1, [r0, #36] +; CHECK-NEXT: movs r1, #30 +; CHECK-NEXT: strb.w r1, [r0, #34] +; CHECK-NEXT: add.w r1, r2, r2, lsl #3 ; CHECK-NEXT: ldr r2, .LCPI0_1 ; CHECK-NEXT: b .LBB0_4 ; CHECK-NEXT: .LBB0_3: @ %bb14 -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: strb.w r1, [r0, #36] +; CHECK-NEXT: movs r1, #30 +; CHECK-NEXT: strb.w r1, [r0, #34] +; CHECK-NEXT: add.w r1, r2, r2, lsl #3 ; CHECK-NEXT: ldr r2, .LCPI0_0 ; CHECK-NEXT: .LBB0_4: @ %bb4 ; CHECK-NEXT: add.w r1, r2, r1, lsl #2 From add00bcbad74c6191362b571c3a5f2fb9224f711 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 8 Jan 2024 10:21:25 +0100 Subject: [PATCH 3/5] [MSSA] Don't require clone creation to succeed (#76819) Sometimes, we create a MemoryAccess for an instruction, which is later simplified (e.g. via devirtualization) such that the new instruction has no memory effects anymore. If we later clone the instruction (e.g. during unswitching), then MSSA will not create a MemoryAccess for the new instruction, triggering an assert. Disable the assertion (by passing CreationMustSucceed=false) and adjust getDefiningAccessForClone() to work correctly in that case. This PR implements the alternative suggestion by alinas from https://github.com/llvm/llvm-project/pull/76142. (cherry picked from commit d02c7931d1be794a230943e300fec4172032e6a8) --- llvm/lib/Analysis/MemorySSAUpdater.cpp | 17 +-- .../memssa-readnone-access.ll | 117 ++++++++++++++++++ 2 files changed, 121 insertions(+), 13 deletions(-) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 9ad60f774e9f8..e87ae7d71fffe 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -568,7 +568,6 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) { static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, - bool CloneWasSimplified, MemorySSA *MSSA) { MemoryAccess *InsnDefining = MA; if (MemoryDef *DefMUD = dyn_cast(InsnDefining)) { @@ -578,18 +577,10 @@ static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, if (Instruction *NewDefMUDI = cast_or_null(VMap.lookup(DefMUDI))) { InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); - if (!CloneWasSimplified) - assert(InsnDefining && "Defining instruction cannot be nullptr."); - else if (!InsnDefining || isa(InsnDefining)) { + if (!InsnDefining || isa(InsnDefining)) { // The clone was simplified, it's no longer a MemoryDef, look up. - auto DefIt = DefMUD->getDefsIterator(); - // Since simplified clones only occur in single block cloning, a - // previous definition must exist, otherwise NewDefMUDI would not - // have been found in VMap. - assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() && - "Previous def must exist"); InsnDefining = getNewDefiningAccessForClone( - &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA); + DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA); } } } @@ -624,9 +615,9 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( NewInsn, getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap, - MPhiMap, CloneWasSimplified, MSSA), + MPhiMap, MSSA), /*Template=*/CloneWasSimplified ? nullptr : MUD, - /*CreationMustSucceed=*/CloneWasSimplified ? false : true); + /*CreationMustSucceed=*/false); if (NewUseOrDef) MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll new file mode 100644 index 0000000000000..2aaf777683e11 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/memssa-readnone-access.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes="loop-mssa(loop-instsimplify,simple-loop-unswitch)" < %s | FileCheck %s + +@vtable = constant ptr @foo + +declare void @foo() memory(none) +declare void @bar() + +; The call becomes known readnone after simplification, but still have a +; MemoryAccess. Make sure this does not lead to an assertion failure. +define void @test(i1 %c) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + br i1 %c, label %exit, label %loop + +exit: + ret void +} + +; Variant with another access after the call. +define void @test2(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test2( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + call void %fn() + call void @bar() + br i1 %c, label %exit, label %loop + +exit: + ret void +} + +; Variant with another access after the call and no access before the call. +define void @test3(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test3( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]] +; CHECK-NEXT: br i1 [[C_FR]], label [[DOTSPLIT_US:%.*]], label [[DOTSPLIT:%.*]] +; CHECK: .split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: br label [[SPLIT_US:%.*]] +; CHECK: split.us: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[EXIT_SPLIT_US:%.*]] +; CHECK: exit.split.us: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br label [[SPLIT:%.*]] +; CHECK: split: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + %fn = load ptr, ptr @vtable, align 8 + br label %split + +split: + call void %fn() + call void @bar() + br i1 %c, label %exit, label %loop + +exit: + ret void +} From 919a630125a00edbc7ebbcd7a681fa50f4c8df4c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 20 Dec 2023 14:41:52 +0100 Subject: [PATCH 4/5] [MergeFunc] Add tests for incorrect const expr merging (NFC) (cherry picked from commit 1ff9fb78c8bb87ffa8700523cd77e4e2bf000740) (cherry picked from commit 3dd2db08a2b30618e21f165cf094de421dc32c00) (cherry picked from commit 836e71a4254cb7b61f9d1b59591e69bb4055eb45) --- llvm/test/Transforms/MergeFunc/constexpr.ll | 55 +++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 llvm/test/Transforms/MergeFunc/constexpr.ll diff --git a/llvm/test/Transforms/MergeFunc/constexpr.ll b/llvm/test/Transforms/MergeFunc/constexpr.ll new file mode 100644 index 0000000000000..7a4823b5c494a --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/constexpr.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt -S -passes=mergefunc -mergefunc-use-aliases < %s | FileCheck %s + +@g1 = external unnamed_addr global i8 +@g2 = external unnamed_addr global i8 + +;. +; CHECK: @g1 = external unnamed_addr global i8 +; CHECK: @g2 = external unnamed_addr global i8 +; CHECK: @f2 = unnamed_addr alias i1 (), ptr @f1 +; CHECK: @f4 = unnamed_addr alias ptr (), ptr @f3 +; CHECK: @f5 = unnamed_addr alias ptr (), ptr @f3 +; CHECK: @f7 = unnamed_addr alias i64 (), ptr @f6 +; CHECK: @f8 = unnamed_addr alias i64 (), ptr @f6 +;. +define i1 @f1() unnamed_addr { +; CHECK-LABEL: define i1 @f1() unnamed_addr { +; CHECK-NEXT: ret i1 icmp eq (ptr @g1, ptr @g2) +; + ret i1 icmp eq (ptr @g1, ptr @g2) +} + +define i1 @f2() unnamed_addr { + ret i1 icmp ne (ptr @g1, ptr @g2) +} + +define ptr @f3() unnamed_addr { +; CHECK-LABEL: define ptr @f3() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr inbounds (i8, ptr @g1, i64 2) +; + ret ptr getelementptr inbounds (i8, ptr @g1, i64 2) +} + +define ptr @f4() unnamed_addr { + ret ptr getelementptr (i16, ptr @g1, i64 2) +} + +define ptr @f5() unnamed_addr { + ret ptr getelementptr (i8, ptr @g1, i64 2) +} + +define i64 @f6() unnamed_addr { +; CHECK-LABEL: define i64 @f6() unnamed_addr { +; CHECK-NEXT: ret i64 add nuw (i64 ptrtoint (ptr @g1 to i64), i64 1) +; + ret i64 add nuw (i64 ptrtoint (ptr @g1 to i64), i64 1) +} + +define i64 @f7() unnamed_addr { + ret i64 add (i64 ptrtoint (ptr @g1 to i64), i64 1) +} + +define i64 @f8() unnamed_addr { + ret i64 sub (i64 ptrtoint (ptr @g1 to i64), i64 1) +} From 700fbf978e6c5bb297d12963206f7487722de480 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 20 Dec 2023 15:05:42 +0100 Subject: [PATCH 5/5] [MergeFunc] Fix comparison of constant expressions Functions using different constant expressions were incorrectly merged, because a lot of state was missing from the comparison, including the opcode, the comparison predicate, the GEP element type, as well as the inbounds, inrange and nowrap poison flags. (cherry picked from commit 8b8f2ef06e341ef634f85fa01800f4e441cacd91) --- .../Transforms/Utils/FunctionComparator.cpp | 25 +++++++++++ llvm/test/Transforms/MergeFunc/constexpr.ll | 41 ++++++++++++++++--- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 8daeb92130ba3..c6f3d4a5c71b9 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -392,6 +392,8 @@ int FunctionComparator::cmpConstants(const Constant *L, case Value::ConstantExprVal: { const ConstantExpr *LE = cast(L); const ConstantExpr *RE = cast(R); + if (int Res = cmpNumbers(LE->getOpcode(), RE->getOpcode())) + return Res; unsigned NumOperandsL = LE->getNumOperands(); unsigned NumOperandsR = RE->getNumOperands(); if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) @@ -401,6 +403,29 @@ int FunctionComparator::cmpConstants(const Constant *L, cast(RE->getOperand(i)))) return Res; } + if (LE->isCompare()) + if (int Res = cmpNumbers(LE->getPredicate(), RE->getPredicate())) + return Res; + if (auto *GEPL = dyn_cast(LE)) { + auto *GEPR = cast(RE); + if (int Res = cmpTypes(GEPL->getSourceElementType(), + GEPR->getSourceElementType())) + return Res; + if (int Res = cmpNumbers(GEPL->isInBounds(), GEPR->isInBounds())) + return Res; + if (int Res = cmpNumbers(GEPL->getInRangeIndex().value_or(unsigned(-1)), + GEPR->getInRangeIndex().value_or(unsigned(-1)))) + return Res; + } + if (auto *OBOL = dyn_cast(LE)) { + auto *OBOR = cast(RE); + if (int Res = + cmpNumbers(OBOL->hasNoUnsignedWrap(), OBOR->hasNoUnsignedWrap())) + return Res; + if (int Res = + cmpNumbers(OBOL->hasNoSignedWrap(), OBOR->hasNoSignedWrap())) + return Res; + } return 0; } case Value::BlockAddressVal: { diff --git a/llvm/test/Transforms/MergeFunc/constexpr.ll b/llvm/test/Transforms/MergeFunc/constexpr.ll index 7a4823b5c494a..9fb7806017422 100644 --- a/llvm/test/Transforms/MergeFunc/constexpr.ll +++ b/llvm/test/Transforms/MergeFunc/constexpr.ll @@ -7,11 +7,6 @@ ;. ; CHECK: @g1 = external unnamed_addr global i8 ; CHECK: @g2 = external unnamed_addr global i8 -; CHECK: @f2 = unnamed_addr alias i1 (), ptr @f1 -; CHECK: @f4 = unnamed_addr alias ptr (), ptr @f3 -; CHECK: @f5 = unnamed_addr alias ptr (), ptr @f3 -; CHECK: @f7 = unnamed_addr alias i64 (), ptr @f6 -; CHECK: @f8 = unnamed_addr alias i64 (), ptr @f6 ;. define i1 @f1() unnamed_addr { ; CHECK-LABEL: define i1 @f1() unnamed_addr { @@ -21,6 +16,9 @@ define i1 @f1() unnamed_addr { } define i1 @f2() unnamed_addr { +; CHECK-LABEL: define i1 @f2() unnamed_addr { +; CHECK-NEXT: ret i1 icmp ne (ptr @g1, ptr @g2) +; ret i1 icmp ne (ptr @g1, ptr @g2) } @@ -32,10 +30,16 @@ define ptr @f3() unnamed_addr { } define ptr @f4() unnamed_addr { +; CHECK-LABEL: define ptr @f4() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr (i16, ptr @g1, i64 2) +; ret ptr getelementptr (i16, ptr @g1, i64 2) } define ptr @f5() unnamed_addr { +; CHECK-LABEL: define ptr @f5() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr (i8, ptr @g1, i64 2) +; ret ptr getelementptr (i8, ptr @g1, i64 2) } @@ -47,9 +51,36 @@ define i64 @f6() unnamed_addr { } define i64 @f7() unnamed_addr { +; CHECK-LABEL: define i64 @f7() unnamed_addr { +; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr @g1 to i64), i64 1) +; ret i64 add (i64 ptrtoint (ptr @g1 to i64), i64 1) } define i64 @f8() unnamed_addr { +; CHECK-LABEL: define i64 @f8() unnamed_addr { +; CHECK-NEXT: ret i64 sub (i64 ptrtoint (ptr @g1 to i64), i64 1) +; ret i64 sub (i64 ptrtoint (ptr @g1 to i64), i64 1) } + +define ptr @f10() unnamed_addr { +; CHECK-LABEL: define ptr @f10() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, inrange i64 1) +; + ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, inrange i64 1) +} + +define ptr @f11() unnamed_addr { +; CHECK-LABEL: define ptr @f11() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, i64 1) +; + ret ptr getelementptr ([4 x i32], ptr @g1, i64 0, i64 1) +} + +define ptr @f12() unnamed_addr { +; CHECK-LABEL: define ptr @f12() unnamed_addr { +; CHECK-NEXT: ret ptr getelementptr ([4 x i32], ptr @g1, inrange i64 0, i64 1) +; + ret ptr getelementptr ([4 x i32], ptr @g1, inrange i64 0, i64 1) +}