From 42cfe16e66840136fed555891412bd2443542b87 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 11 Sep 2023 14:04:24 +0100 Subject: [PATCH] [TwoAddressInstruction] Use isPlainlyKilled in processTiedPairs Calling isPlainlyKilled instead of directly checking for a kill flag should make processTiedPairs behave the same with LiveIntervals (i.e. when compiling with -early-live-intervals) as it does with LiveVariables. --- .../lib/CodeGen/TwoAddressInstructionPass.cpp | 4 +- llvm/test/CodeGen/SystemZ/rot-02.ll | 24 ++---- llvm/test/CodeGen/X86/combine-or.ll | 3 +- llvm/test/CodeGen/X86/combine-rotates.ll | 78 +++++++------------ .../statepoint-cmp-sunk-past-statepoint.ll | 3 +- 5 files changed, 39 insertions(+), 73 deletions(-) diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 560a0a4fbac66..4ae396723ef09 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1566,7 +1566,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, MachineOperand &MO = MI->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && "inconsistent operand info for 2-reg pass"); - if (MO.isKill()) { + if (isPlainlyKilled(MO)) { MO.setIsKill(false); RemovedKillFlag = true; } @@ -1587,7 +1587,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, for (MachineOperand &MO : MI->all_uses()) { if (MO.getReg() == RegB) { if (MO.getSubReg() == SubRegB && !IsEarlyClobber) { - if (MO.isKill()) { + if (isPlainlyKilled(MO)) { MO.setIsKill(false); RemovedKillFlag = true; } diff --git a/llvm/test/CodeGen/SystemZ/rot-02.ll b/llvm/test/CodeGen/SystemZ/rot-02.ll index 84fac6af5fcaa..aa9d841703552 100644 --- a/llvm/test/CodeGen/SystemZ/rot-02.ll +++ b/llvm/test/CodeGen/SystemZ/rot-02.ll @@ -2,8 +2,8 @@ ; Test removal of AND operations that don't affect last 6 bits of rotate amount ; operand. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefixes=CHECK,CHECK-LV -; RUN: llc < %s -mtriple=s390x-linux-gnu -early-live-intervals | FileCheck %s -check-prefixes=CHECK,CHECK-LIS +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -early-live-intervals | FileCheck %s ; Test that AND is not removed when some lower 5 bits are not set. define i32 @f1(i32 %val, i32 %amt) { @@ -76,20 +76,12 @@ define i64 @f4(i64 %val, i64 %amt) { ; Test that AND is not entirely removed if the result is reused. define i32 @f5(i32 %val, i32 %amt) { -; CHECK-LV-LABEL: f5: -; CHECK-LV: # %bb.0: -; CHECK-LV-NEXT: rll %r2, %r2, 0(%r3) -; CHECK-LV-NEXT: nilf %r3, 63 -; CHECK-LV-NEXT: ar %r2, %r3 -; CHECK-LV-NEXT: br %r14 -; -; CHECK-LIS-LABEL: f5: -; CHECK-LIS: # %bb.0: -; CHECK-LIS-NEXT: rll %r0, %r2, 0(%r3) -; CHECK-LIS-NEXT: nilf %r3, 63 -; CHECK-LIS-NEXT: ar %r3, %r0 -; CHECK-LIS-NEXT: lr %r2, %r3 -; CHECK-LIS-NEXT: br %r14 +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: rll %r2, %r2, 0(%r3) +; CHECK-NEXT: nilf %r3, 63 +; CHECK-NEXT: ar %r2, %r3 +; CHECK-NEXT: br %r14 %and = and i32 %amt, 63 %inv = sub i32 32, %and diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index bfb9885c10c4e..460251ffa6c5d 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -253,7 +253,8 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LIS-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0,1],xmm3[2,3,4,5,6,7] ; CHECK-LIS-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,1,1] ; CHECK-LIS-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7] -; CHECK-LIS-NEXT: por %xmm2, %xmm0 +; CHECK-LIS-NEXT: por %xmm0, %xmm2 +; CHECK-LIS-NEXT: movdqa %xmm2, %xmm0 ; CHECK-LIS-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32> diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll index 8e43ae438f2ae..65d74c8f262a3 100644 --- a/llvm/test/CodeGen/X86/combine-rotates.ll +++ b/llvm/test/CodeGen/X86/combine-rotates.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2,SSE2-LV -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -early-live-intervals | FileCheck %s --check-prefixes=CHECK,SSE2,SSE2-LIS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -early-live-intervals | FileCheck %s --check-prefixes=CHECK,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=CHECK,XOP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 @@ -115,56 +115,30 @@ define i32 @combine_rot_select_zero(i32, i32) { } define <4 x i32> @combine_vec_rot_select_zero(<4 x i32>, <4 x i32>) { -; SSE2-LV-LABEL: combine_vec_rot_select_zero: -; SSE2-LV: # %bb.0: -; SSE2-LV-NEXT: pxor %xmm2, %xmm2 -; SSE2-LV-NEXT: pcmpeqd %xmm1, %xmm2 -; SSE2-LV-NEXT: pslld $23, %xmm1 -; SSE2-LV-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-LV-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-LV-NEXT: cvttps2dq %xmm1, %xmm1 -; SSE2-LV-NEXT: movdqa %xmm0, %xmm3 -; SSE2-LV-NEXT: pmuludq %xmm1, %xmm3 -; SSE2-LV-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] -; SSE2-LV-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] -; SSE2-LV-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-LV-NEXT: pmuludq %xmm5, %xmm1 -; SSE2-LV-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] -; SSE2-LV-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; SSE2-LV-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; SSE2-LV-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SSE2-LV-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; SSE2-LV-NEXT: por %xmm4, %xmm3 -; SSE2-LV-NEXT: pand %xmm2, %xmm0 -; SSE2-LV-NEXT: pandn %xmm3, %xmm2 -; SSE2-LV-NEXT: por %xmm2, %xmm0 -; SSE2-LV-NEXT: retq -; -; SSE2-LIS-LABEL: combine_vec_rot_select_zero: -; SSE2-LIS: # %bb.0: -; SSE2-LIS-NEXT: movdqa %xmm0, %xmm2 -; SSE2-LIS-NEXT: pxor %xmm0, %xmm0 -; SSE2-LIS-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-LIS-NEXT: pslld $23, %xmm1 -; SSE2-LIS-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-LIS-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-LIS-NEXT: cvttps2dq %xmm1, %xmm1 -; SSE2-LIS-NEXT: movdqa %xmm2, %xmm3 -; SSE2-LIS-NEXT: pmuludq %xmm1, %xmm3 -; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] -; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] -; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-LIS-NEXT: pmuludq %xmm5, %xmm1 -; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] -; SSE2-LIS-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; SSE2-LIS-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SSE2-LIS-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; SSE2-LIS-NEXT: por %xmm4, %xmm3 -; SSE2-LIS-NEXT: pand %xmm0, %xmm2 -; SSE2-LIS-NEXT: pandn %xmm3, %xmm0 -; SSE2-LIS-NEXT: por %xmm2, %xmm0 -; SSE2-LIS-NEXT: retq +; SSE2-LABEL: combine_vec_rot_select_zero: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pslld $23, %xmm1 +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pmuludq %xmm1, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pmuludq %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; SSE2-NEXT: por %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm2 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: retq ; ; XOP-LABEL: combine_vec_rot_select_zero: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll index 63b2a9d415041..731cc95114f77 100644 --- a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll +++ b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll @@ -61,8 +61,7 @@ zero: ; CHECK: bb.4 ; CHECK: bb.5 ; CHECK: %3:gr64 = COPY %10 -; CHECK-LV: %4:gr64 = COPY killed %10 -; CHECK-LIS: %4:gr64 = COPY %10 +; CHECK: %4:gr64 = COPY killed %10 ; CHECK: %4:gr64 = nuw ADD64ri32 %4, 8, implicit-def dead $eflags ; CHECK: TEST64rr killed %1, %1, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 5, implicit killed $eflags