From 5417cc6a0665fe6dc8558805c41edc27c12f819d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 17 Aug 2023 11:55:38 +0200 Subject: [PATCH] [X86] Fix i128 argument passing under SysV ABI The x86_64 SysV ABI specifies that __int128 is passed either in two registers (if available) or in a 16 byte aligned stack slot. GCC implements this behavior. However, if only one free register is available, LLVM will instead pass one half of the i128 in a register, and the other on the stack. Make sure that either both are passed in registers or both on the stack. Fixes https://github.com/llvm/llvm-project/issues/41784. The patch is basically what craig.topper proposed to do there. Differential Revision: https://reviews.llvm.org/D158169 (cherry picked from commit fa1b6e6b34eb6382c451f3a06a7c52d7ac6ada1d) --- llvm/lib/Target/X86/X86CallingConv.td | 8 +++ llvm/test/CodeGen/X86/addcarry.ll | 2 +- llvm/test/CodeGen/X86/i128-abi.ll | 90 +++++++++++++++++++++++++++ llvm/test/CodeGen/X86/sadd_sat_vec.ll | 36 +++++------ llvm/test/CodeGen/X86/ssub_sat_vec.ll | 36 +++++------ llvm/test/CodeGen/X86/subcarry.ll | 2 +- llvm/test/CodeGen/X86/uadd_sat_vec.ll | 8 +-- llvm/test/CodeGen/X86/usub_sat_vec.ll | 8 +-- 8 files changed, 144 insertions(+), 46 deletions(-) create mode 100644 llvm/test/CodeGen/X86/i128-abi.ll diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 06cebdc215943..29caf79e3c743 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -532,6 +532,14 @@ def CC_X86_64_C : CallingConv<[ // The first 6 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>, + + // i128 can be either passed in two i64 registers, or on the stack, but + // not split across register and stack. As such, do not allow using R9 + // for a split i64. + CCIfType<[i64], + CCIfSplit>>, + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, // The first 8 MMX vector arguments are passed in XMM registers on Darwin. diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll index af8f921ef9773..231645f641591 100644 --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -48,7 +48,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind { ; CHECK-LABEL: add256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: addq %r9, %rsi +; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8 diff --git a/llvm/test/CodeGen/X86/i128-abi.ll b/llvm/test/CodeGen/X86/i128-abi.ll new file mode 100644 index 0000000000000..d1d6f86e08fb8 --- /dev/null +++ b/llvm/test/CodeGen/X86/i128-abi.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i128 @in_reg(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i128 %a4) { +; CHECK-LABEL: in_reg: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %r9, %rdx +; CHECK-NEXT: movq %r8, %rax +; CHECK-NEXT: retq + ret i128 %a4 +} + +define i128 @on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5) { +; CHECK-LABEL: on_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: movq 8(%rsp), %rax +; CHECK-NEXT: movq 16(%rsp), %rdx +; CHECK-NEXT: retq + ret i128 %a5 +} + +define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) { +; CHECK-LABEL: trailing_arg_on_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: movq 24(%rsp), %rax +; CHECK-NEXT: retq + ret i64 %a6 +} + +define void @call_in_reg(i128 %x) nounwind { +; CHECK-LABEL: call_in_reg: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rsi, %r9 +; CHECK-NEXT: movq %rdi, %r8 +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: movl $2, %edx +; CHECK-NEXT: movl $3, %ecx +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq in_reg@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + call i128 @in_reg(i64 0, i64 1, i64 2, i64 3, i128 %x) + ret void +} + +define void @call_on_stack(i128 %x) nounwind { +; CHECK-LABEL: call_on_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %r9 +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: movl $2, %edx +; CHECK-NEXT: movl $3, %ecx +; CHECK-NEXT: movl $4, %r8d +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: pushq %r9 +; CHECK-NEXT: callq on_stack@PLT +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + call i128 @on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x) + ret void +} + +define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind { +; CHECK-LABEL: call_trailing_arg_on_stack: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movq %rsi, %r9 +; CHECK-NEXT: movq %rdi, %r10 +; CHECK-NEXT: subq $8, %rsp +; CHECK-NEXT: movl $1, %esi +; CHECK-NEXT: movl $2, %edx +; CHECK-NEXT: movl $3, %ecx +; CHECK-NEXT: movl $4, %r8d +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: pushq %r9 +; CHECK-NEXT: pushq %r10 +; CHECK-NEXT: callq trailing_arg_on_stack@PLT +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y) + ret void +} diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll index 8d914ba81a096..45a8a6fd5449a 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: seto %dil -; SSE-NEXT: movq %r8, %r10 -; SSE-NEXT: sarq $63, %r10 +; SSE-NEXT: movq %r8, %r9 +; SSE-NEXT: sarq $63, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmovneq %r10, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; SSE-NEXT: xorq %r11, %r10 +; SSE-NEXT: cmovneq %r9, %rcx +; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; SSE-NEXT: xorq %r10, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmoveq %r8, %r10 -; SSE-NEXT: addq %r9, %rsi +; SSE-NEXT: cmoveq %r8, %r9 +; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; SSE-NEXT: seto %dil ; SSE-NEXT: movq %rdx, %r8 ; SSE-NEXT: sarq $63, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmovneq %r8, %rsi -; SSE-NEXT: xorq %r11, %r8 +; SSE-NEXT: xorq %r10, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmoveq %rdx, %r8 ; SSE-NEXT: movq %rcx, 16(%rax) ; SSE-NEXT: movq %rsi, (%rax) -; SSE-NEXT: movq %r10, 24(%rax) +; SSE-NEXT: movq %r9, 24(%rax) ; SSE-NEXT: movq %r8, 8(%rax) ; SSE-NEXT: retq ; @@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: seto %dil -; AVX-NEXT: movq %r8, %r10 -; AVX-NEXT: sarq $63, %r10 +; AVX-NEXT: movq %r8, %r9 +; AVX-NEXT: sarq $63, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmovneq %r10, %rcx -; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; AVX-NEXT: xorq %r11, %r10 +; AVX-NEXT: cmovneq %r9, %rcx +; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; AVX-NEXT: xorq %r10, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmoveq %r8, %r10 -; AVX-NEXT: addq %r9, %rsi +; AVX-NEXT: cmoveq %r8, %r9 +; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: seto %dil ; AVX-NEXT: movq %rdx, %r8 ; AVX-NEXT: sarq $63, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmovneq %r8, %rsi -; AVX-NEXT: xorq %r11, %r8 +; AVX-NEXT: xorq %r10, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmoveq %rdx, %r8 ; AVX-NEXT: movq %rcx, 16(%rax) ; AVX-NEXT: movq %rsi, (%rax) -; AVX-NEXT: movq %r10, 24(%rax) +; AVX-NEXT: movq %r9, 24(%rax) ; AVX-NEXT: movq %r8, 8(%rax) ; AVX-NEXT: retq %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll index 14f1985c60ff6..d99d5aaa87536 100644 --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -2026,27 +2026,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: seto %dil -; SSE-NEXT: movq %r8, %r10 -; SSE-NEXT: sarq $63, %r10 +; SSE-NEXT: movq %r8, %r9 +; SSE-NEXT: sarq $63, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmovneq %r10, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; SSE-NEXT: xorq %r11, %r10 +; SSE-NEXT: cmovneq %r9, %rcx +; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; SSE-NEXT: xorq %r10, %r9 ; SSE-NEXT: testb %dil, %dil -; SSE-NEXT: cmoveq %r8, %r10 -; SSE-NEXT: subq %r9, %rsi +; SSE-NEXT: cmoveq %r8, %r9 +; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; SSE-NEXT: seto %dil ; SSE-NEXT: movq %rdx, %r8 ; SSE-NEXT: sarq $63, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmovneq %r8, %rsi -; SSE-NEXT: xorq %r11, %r8 +; SSE-NEXT: xorq %r10, %r8 ; SSE-NEXT: testb %dil, %dil ; SSE-NEXT: cmoveq %rdx, %r8 ; SSE-NEXT: movq %rcx, 16(%rax) ; SSE-NEXT: movq %rsi, (%rax) -; SSE-NEXT: movq %r10, 24(%rax) +; SSE-NEXT: movq %r9, 24(%rax) ; SSE-NEXT: movq %r8, 8(%rax) ; SSE-NEXT: retq ; @@ -2056,27 +2056,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: seto %dil -; AVX-NEXT: movq %r8, %r10 -; AVX-NEXT: sarq $63, %r10 +; AVX-NEXT: movq %r8, %r9 +; AVX-NEXT: sarq $63, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmovneq %r10, %rcx -; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000 -; AVX-NEXT: xorq %r11, %r10 +; AVX-NEXT: cmovneq %r9, %rcx +; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000 +; AVX-NEXT: xorq %r10, %r9 ; AVX-NEXT: testb %dil, %dil -; AVX-NEXT: cmoveq %r8, %r10 -; AVX-NEXT: subq %r9, %rsi +; AVX-NEXT: cmoveq %r8, %r9 +; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: seto %dil ; AVX-NEXT: movq %rdx, %r8 ; AVX-NEXT: sarq $63, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmovneq %r8, %rsi -; AVX-NEXT: xorq %r11, %r8 +; AVX-NEXT: xorq %r10, %r8 ; AVX-NEXT: testb %dil, %dil ; AVX-NEXT: cmoveq %rdx, %r8 ; AVX-NEXT: movq %rcx, 16(%rax) ; AVX-NEXT: movq %rsi, (%rax) -; AVX-NEXT: movq %r10, 24(%rax) +; AVX-NEXT: movq %r9, 24(%rax) ; AVX-NEXT: movq %r8, 8(%rax) ; AVX-NEXT: retq %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll index 9538ea1061cd1..1e9db9f55a8d5 100644 --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -21,7 +21,7 @@ define i256 @sub256(i256 %a, i256 %b) nounwind { ; CHECK-LABEL: sub256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: subq %r9, %rsi +; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll index f97603ebea92b..50c73009314a9 100644 --- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll @@ -1161,11 +1161,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE-LABEL: v2i128: ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: addq %r9, %rsi +; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; SSE-NEXT: movq $-1, %rdi -; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: cmovbq %rdi, %rdx +; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: cmovbq %rdi, %r8 @@ -1179,11 +1179,11 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX-LABEL: v2i128: ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: addq %r9, %rsi +; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx ; AVX-NEXT: movq $-1, %rdi -; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: cmovbq %rdi, %rdx +; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: cmovbq %rdi, %r8 diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll index a9cf02991d428..8823b98c4ff8a 100644 --- a/llvm/test/CodeGen/X86/usub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll @@ -1057,10 +1057,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: xorl %edi, %edi -; SSE-NEXT: subq %r9, %rsi +; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx -; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: cmovbq %rdi, %rdx +; SSE-NEXT: cmovbq %rdi, %rsi ; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; SSE-NEXT: cmovbq %rdi, %r8 @@ -1075,10 +1075,10 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax ; AVX-NEXT: xorl %edi, %edi -; AVX-NEXT: subq %r9, %rsi +; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rsi ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx -; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: cmovbq %rdi, %rdx +; AVX-NEXT: cmovbq %rdi, %rsi ; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx ; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8 ; AVX-NEXT: cmovbq %rdi, %r8