diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index cd21af770e1a4..0a61920b7c079 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4091,8 +4091,11 @@ SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const { if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1) return OFK_Never; - // TODO: Add ConstantRange::signedSubMayOverflow handling. - return OFK_Sometime; + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true); + return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range)); } SelectionDAG::OverflowKind @@ -4101,8 +4104,11 @@ SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const { if (isNullConstant(N1)) return OFK_Never; - // TODO: Add ConstantRange::unsignedSubMayOverflow handling. - return OFK_Sometime; + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); + return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range)); } SelectionDAG::OverflowKind diff --git a/llvm/test/CodeGen/X86/combine-subo.ll b/llvm/test/CodeGen/X86/combine-subo.ll index 6965f6d7af27b..99f26525d49e5 100644 --- a/llvm/test/CodeGen/X86/combine-subo.ll +++ b/llvm/test/CodeGen/X86/combine-subo.ll @@ -4,9 +4,14 @@ declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone +declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone + declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8>, <4 x i8>) nounwind readnone +declare { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8>) nounwind readnone ; fold (ssub x, 0) -> x define i32 @combine_ssub_zero(i32 %a0, i32 %a1) { @@ -148,3 +153,79 @@ define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) { %4 = select <4 x i1> %3, <4 x i32> %a1, <4 x i32> %2 ret <4 x i32> %4 } + +define { i32, i1 } @combine_usub_nuw(i32 %a, i32 %b) { +; CHECK-LABEL: combine_usub_nuw: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: retq + %aa = or i32 %a, 2147483648 + %bb = and i32 %b, 2147483647 + %x = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %aa, i32 %bb) + ret { i32, i1 } %x +} + +define { i8, i1 } @usub_always_overflow(i8 %x) nounwind { +; CHECK-LABEL: usub_always_overflow: +; CHECK: # %bb.0: +; CHECK-NEXT: orb $64, %dil +; CHECK-NEXT: movb $63, %al +; CHECK-NEXT: subb %dil, %al +; CHECK-NEXT: setb %dl +; CHECK-NEXT: retq + %y = or i8 %x, 64 + %a = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 63, i8 %y) + ret { i8, i1 } %a +} + +define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind { +; CHECK-LABEL: ssub_always_overflow: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpb $30, %dil +; CHECK-NEXT: movl $29, %ecx +; CHECK-NEXT: cmovgel %edi, %ecx +; CHECK-NEXT: movb $-100, %al +; CHECK-NEXT: subb %cl, %al +; CHECK-NEXT: seto %dl +; CHECK-NEXT: retq + %c = icmp sgt i8 %x, 29 + %y = select i1 %c, i8 %x, i8 29 + %a = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 -100, i8 %y) + ret { i8, i1 } %a +} + +define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind { +; SSE-LABEL: always_usub_const_vector: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: always_usub_const_vector: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: retq + %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8> ) + ret { <4 x i8>, <4 x i1> } %x +} + +define { <4 x i8>, <4 x i1> } @never_usub_const_vector() nounwind { +; SSE-LABEL: never_usub_const_vector: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = <127,255,0,254,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: never_usub_const_vector: +; AVX: # %bb.0: +; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [127,255,0,254,127,255,0,254,127,255,0,254,127,255,0,254] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: retq + %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> , <4 x i8> ) + ret { <4 x i8>, <4 x i1> } %x +} diff --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll index 4440485af54bb..b3ffa209bc700 100644 --- a/llvm/test/CodeGen/X86/or-with-overflow.ll +++ b/llvm/test/CodeGen/X86/or-with-overflow.ll @@ -161,19 +161,13 @@ define i32 @or_i32_rr(i32 %0, i32 %1) { define i64 @or_i64_ri(i64 %0, i64 %1) nounwind { ; X86-LABEL: or_i64_ri: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $17, %ecx -; X86-NEXT: cmpl $1, %ecx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: jl .LBB6_2 +; X86-NEXT: testl %edx, %edx +; X86-NEXT: js .LBB6_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: orl $17, %eax ; X86-NEXT: .LBB6_2: -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: or_i64_ri: