diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a4357197e2843..5a80754e7a3bb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -701,6 +701,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSUB, MVT::f16, Promote); setOperationAction(ISD::FMUL, MVT::f16, Promote); setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Custom); + setOperationAction(ISD::FNEG, MVT::f16, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom); setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); diff --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll index 3af8b1aec1feb..b276aac7e19f9 100644 --- a/llvm/test/CodeGen/X86/fp16-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll @@ -59,12 +59,10 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind { define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind { ; F16C-LABEL: test_half_copysign: ; F16C: # %bb.0: -; F16C-NEXT: vpextrw $0, %xmm1, %eax -; F16C-NEXT: andl $32768, %eax # imm = 0x8000 -; F16C-NEXT: vpextrw $0, %xmm0, %ecx -; F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF -; F16C-NEXT: orl %eax, %ecx -; F16C-NEXT: movw %cx, (%rdi) +; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; F16C-NEXT: vpor %xmm1, %xmm0, %xmm0 +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_copysign: @@ -76,23 +74,23 @@ define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind { ; ; X64-LABEL: test_half_copysign: ; X64: # %bb.0: -; X64-NEXT: pextrw $0, %xmm1, %eax -; X64-NEXT: andl $32768, %eax # imm = 0x8000 -; X64-NEXT: pextrw $0, %xmm0, %ecx -; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: movw %cx, (%rdi) +; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: por %xmm1, %xmm0 +; X64-NEXT: pextrw $0, %xmm0, %eax +; X64-NEXT: movw %ax, (%rdi) ; X64-NEXT: retq ; ; X86-LABEL: test_half_copysign: ; X86: # %bb.0: +; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: andl $32768, %ecx # imm = 0x8000 -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-NEXT: andl $32767, %edx # imm = 0x7FFF -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: movw %dx, (%eax) +; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: por %xmm1, %xmm0 +; X86-NEXT: pextrw $0, %xmm0, %ecx +; X86-NEXT: movw %cx, (%eax) ; X86-NEXT: retl %res = call half @llvm.copysign.half(half %a0, half %a1) store half %res, ptr %p0, align 2 @@ -334,9 +332,7 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind { define void @test_half_fabs(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_fabs: ; F16C: # %bb.0: -; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; @@ -349,14 +345,9 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind { ; ; X64-LABEL: test_half_fabs: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: callq __extendhfsf2@PLT ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: callq __truncsfhf2@PLT ; X64-NEXT: pextrw $0, %xmm0, %eax -; X64-NEXT: movw %ax, (%rbx) -; X64-NEXT: popq %rbx +; X64-NEXT: movw %ax, (%rdi) ; X64-NEXT: retq ; ; X86-LABEL: test_half_fabs: @@ -514,9 +505,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind { define void @test_half_fneg(half %a0, ptr %p0) nounwind { ; F16C-LABEL: test_half_fneg: ; F16C: # %bb.0: -; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; F16C-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; @@ -529,14 +518,9 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind { ; ; X64-LABEL: test_half_fneg: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: callq __extendhfsf2@PLT ; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-NEXT: callq __truncsfhf2@PLT ; X64-NEXT: pextrw $0, %xmm0, %eax -; X64-NEXT: movw %ax, (%rbx) -; X64-NEXT: popq %rbx +; X64-NEXT: movw %ax, (%rdi) ; X64-NEXT: retq ; ; X86-LABEL: test_half_fneg: diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 2472e6e19c862..fb836cd2480a7 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -1041,7 +1041,6 @@ define void @main.158() #0 { ; CHECK-LIBCALL: # %bb.0: # %entry ; CHECK-LIBCALL-NEXT: pushq %rax ; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 -; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT ; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1 @@ -1059,10 +1058,10 @@ define void @main.158() #0 { ; BWON-F16C-LABEL: main.158: ; BWON-F16C: # %bb.0: # %entry ; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1 -; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0] -; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2 +; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0] +; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1 +; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; BWON-F16C-NEXT: jae .LBB20_2 ; BWON-F16C-NEXT: # %bb.1: # %entry ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] @@ -1074,8 +1073,7 @@ define void @main.158() #0 { ; CHECK-I686-LABEL: main.158: ; CHECK-I686: # %bb.0: # %entry ; CHECK-I686-NEXT: subl $12, %esp -; CHECK-I686-NEXT: movl $0, (%esp) -; CHECK-I686-NEXT: calll __truncsfhf2 +; CHECK-I686-NEXT: pxor %xmm0, %xmm0 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax ; CHECK-I686-NEXT: movw %ax, (%esp) ; CHECK-I686-NEXT: calll __extendhfsf2 @@ -1192,32 +1190,25 @@ entry: define half @fcopysign(half %x, half %y) { ; CHECK-LIBCALL-LABEL: fcopysign: ; CHECK-LIBCALL: # %bb.0: -; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax -; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000 -; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx -; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF -; CHECK-LIBCALL-NEXT: orl %eax, %ecx -; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0 +; CHECK-LIBCALL-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-LIBCALL-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-LIBCALL-NEXT: orps %xmm1, %xmm0 ; CHECK-LIBCALL-NEXT: retq ; ; BWON-F16C-LABEL: fcopysign: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax -; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000 -; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx -; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF -; BWON-F16C-NEXT: orl %eax, %ecx -; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 +; BWON-F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; BWON-F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; BWON-F16C-NEXT: vorps %xmm1, %xmm0, %xmm0 ; BWON-F16C-NEXT: retq ; ; CHECK-I686-LABEL: fcopysign: ; CHECK-I686: # %bb.0: -; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000 -; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF -; CHECK-I686-NEXT: orl %eax, %ecx -; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0 +; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 +; CHECK-I686-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; CHECK-I686-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; CHECK-I686-NEXT: por %xmm1, %xmm0 ; CHECK-I686-NEXT: retl %a = call half @llvm.copysign.f16(half %x, half %y) ret half %a