diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 9830b521797c1..c675671988ed4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1123,15 +1123,13 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: { // FIXME: Support other types - unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + Type *FromTy = + getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg())); unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) + if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy) return UnableToLegalize; LegalizeResult Status = conversionLibcall( - MI, MIRBuilder, - ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), - FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - LocObserver); + MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver); if (Status != Legalized) return Status; break; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 42cd43c3afa37..fef0b722efe45 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -661,7 +661,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // Conversions getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) - .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) + .legalFor({{s32, s32}, + {s64, s32}, + {s32, s64}, + {s64, s64}, + {v2s64, v2s64}, + {v4s32, v4s32}, + {v2s32, v2s32}}) .legalIf([=](const LegalityQuery &Query) { return HasFP16 && (Query.Types[1] == s16 || Query.Types[1] == v4s16 || @@ -669,26 +675,38 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) (Query.Types[0] == s32 || Query.Types[0] == s64 || Query.Types[0] == v4s16 || Query.Types[0] == v8s16); }) - .widenScalarToNextPow2(0) - .clampScalar(0, s32, s64) - .widenScalarToNextPow2(1) - .clampScalarOrElt(1, MinFPScalar, s64) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) + .scalarizeIf(scalarOrEltWiderThan(1, 64), 1) + // The range of a fp16 value fits into an i17, so we can lower the width + // to i64. + .narrowScalarIf( + [=](const LegalityQuery &Query) { + return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64; + }, + changeTo(0, s64)) .moreElementsToNextPow2(0) + .widenScalarOrEltToNextPow2OrMinSize(0) + .minScalar(0, s32) + .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32) .widenScalarIf( [=](const LegalityQuery &Query) { - return Query.Types[0].getScalarSizeInBits() > - Query.Types[1].getScalarSizeInBits(); + return Query.Types[0].getScalarSizeInBits() <= 64 && + Query.Types[0].getScalarSizeInBits() > + Query.Types[1].getScalarSizeInBits(); }, LegalizeMutations::changeElementSizeTo(1, 0)) .widenScalarIf( [=](const LegalityQuery &Query) { - return Query.Types[0].getScalarSizeInBits() < - Query.Types[1].getScalarSizeInBits(); + return Query.Types[1].getScalarSizeInBits() <= 64 && + Query.Types[0].getScalarSizeInBits() < + Query.Types[1].getScalarSizeInBits(); }, LegalizeMutations::changeElementSizeTo(0, 1)) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v2s32, v4s32) - .clampMaxNumElements(0, s64, 2); + .clampMaxNumElements(0, s64, 2) + .libcallFor( + {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}}); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll deleted file mode 100644 index e5ca0d41fc549..0000000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll +++ /dev/null @@ -1,8 +0,0 @@ -;RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -global-isel-abort=2 %s 2>&1 | FileCheck %s -; CHECK: fallback -; CHECK-LABEL: foo -define i16 @foo(ptr %p) { - %tmp0 = load fp128, ptr %p - %tmp1 = fptoui fp128 %tmp0 to i16 - ret i16 %tmp1 -} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir index a9afc61cb42a6..a3094225a031a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir @@ -711,79 +711,3 @@ body: | %1:fpr(<2 x s32>) = G_UITOFP %0 $d0 = COPY %1(<2 x s32>) ... - ---- -name: fptosi_v2s64_v2s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $d0 - - ; CHECK-LABEL: name: fptosi_v2s64_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[FCVTLv2i32_:%[0-9]+]]:fpr128 = nofpexcept FCVTLv2i32 [[COPY]] - ; CHECK: [[FCVTZSv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZSv2f64 [[FCVTLv2i32_]] - ; CHECK: $q0 = COPY [[FCVTZSv2f64_]] - %0:fpr(<2 x s32>) = COPY $d0 - %1:fpr(<2 x s64>) = G_FPTOSI %0 - $q0 = COPY %1(<2 x s64>) -... - ---- -name: fptoui_v2s64_v2s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $d0 - - ; CHECK-LABEL: name: fptoui_v2s64_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[FCVTLv2i32_:%[0-9]+]]:fpr128 = nofpexcept FCVTLv2i32 [[COPY]] - ; CHECK: [[FCVTZUv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZUv2f64 [[FCVTLv2i32_]] - ; CHECK: $q0 = COPY [[FCVTZUv2f64_]] - %0:fpr(<2 x s32>) = COPY $d0 - %1:fpr(<2 x s64>) = G_FPTOUI %0 - $q0 = COPY %1(<2 x s64>) -... - ---- -name: fptosi_v2s32_v2s64 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $q0 - - ; CHECK-LABEL: name: fptosi_v2s32_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[FCVTZSv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZSv2f64 [[COPY]] - ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[FCVTZSv2f64_]] - ; CHECK: $d0 = COPY [[XTNv2i32_]] - %0:fpr(<2 x s64>) = COPY $q0 - %1:fpr(<2 x s32>) = G_FPTOSI %0 - $d0 = COPY %1(<2 x s32>) -... - ---- -name: fptoui_v2s32_v2s64 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $q0 - - ; CHECK-LABEL: name: fptoui_v2s32_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[FCVTZUv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZUv2f64 [[COPY]] - ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[FCVTZUv2f64_]] - ; CHECK: $d0 = COPY [[XTNv2i32_]] - %0:fpr(<2 x s64>) = COPY $q0 - %1:fpr(<2 x s32>) = G_FPTOUI %0 - $d0 = COPY %1(<2 x s32>) -... diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 3b8054a635bcd..4723ac01d6021 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1,55 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 -; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 - -; CHECK-GI: warning: Instruction selection used fallback path for fptos_f64_i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f64_i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f32_i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f32_i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f64_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f64_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f64_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f64_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f32_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f32_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f32_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f32_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f16_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f16_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i128 +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define i64 @fptos_f64_i64(double %a) { ; CHECK-LABEL: fptos_f64_i64: @@ -558,56 +511,72 @@ entry: } define i64 @fptos_f128_i64(fp128 %a) { -; CHECK-LABEL: fptos_f128_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixtfdi -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_f128_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixtfdi +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_f128_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: b __fixtfdi entry: %c = fptosi fp128 %a to i64 ret i64 %c } define i64 @fptou_f128_i64(fp128 %a) { -; CHECK-LABEL: fptou_f128_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixunstfdi -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_f128_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixunstfdi +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f128_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: b __fixunstfdi entry: %c = fptoui fp128 %a to i64 ret i64 %c } define i32 @fptos_f128_i32(fp128 %a) { -; CHECK-LABEL: fptos_f128_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_f128_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_f128_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: b __fixtfsi entry: %c = fptosi fp128 %a to i32 ret i32 %c } define i32 @fptou_f128_i32(fp128 %a) { -; CHECK-LABEL: fptou_f128_i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixunstfsi -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_f128_i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixunstfsi +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f128_i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: b __fixunstfsi entry: %c = fptoui fp128 %a to i32 ret i32 %c @@ -628,14 +597,23 @@ entry: } define i16 @fptou_f128_i16(fp128 %a) { -; CHECK-LABEL: fptou_f128_i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_f128_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f128_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptoui fp128 %a to i16 ret i16 %c @@ -656,14 +634,23 @@ entry: } define i8 @fptou_f128_i8(fp128 %a) { -; CHECK-LABEL: fptou_f128_i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_f128_i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_f128_i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptoui fp128 %a to i8 ret i8 %c @@ -2290,152 +2277,278 @@ entry: } define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { -; CHECK-LABEL: fptos_v2f64_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f64_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov d0, v0.d[1] +; CHECK-SD-NEXT: bl __fixdfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: bl __fixdfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v2f64_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -24 +; CHECK-GI-NEXT: .cfi_offset b8, -32 +; CHECK-GI-NEXT: mov d8, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: bl __fixdfti +; CHECK-GI-NEXT: fmov d0, d8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixdfti +; CHECK-GI-NEXT: mov x2, x0 +; CHECK-GI-NEXT: mov x3, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptosi <2 x double> %a to <2 x i128> ret <2 x i128> %c } define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { -; CHECK-LABEL: fptou_v2f64_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f64_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov d0, v0.d[1] +; CHECK-SD-NEXT: bl __fixunsdfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: bl __fixunsdfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v2f64_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -24 +; CHECK-GI-NEXT: .cfi_offset b8, -32 +; CHECK-GI-NEXT: mov d8, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: bl __fixunsdfti +; CHECK-GI-NEXT: fmov d0, d8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixunsdfti +; CHECK-GI-NEXT: mov x2, x0 +; CHECK-GI-NEXT: mov x3, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptoui <2 x double> %a to <2 x i128> ret <2 x i128> %c } define <3 x i128> @fptos_v3f64_v3i128(<3 x double> %a) { -; CHECK-LABEL: fptos_v3f64_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: .cfi_offset b8, -56 -; CHECK-NEXT: .cfi_offset b9, -64 -; CHECK-NEXT: fmov d9, d0 -; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: fmov d8, d2 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: fmov d0, d8 -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: fmov d0, d9 -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f64_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: .cfi_offset b8, -56 +; CHECK-SD-NEXT: .cfi_offset b9, -64 +; CHECK-SD-NEXT: fmov d9, d0 +; CHECK-SD-NEXT: fmov d0, d1 +; CHECK-SD-NEXT: fmov d8, d2 +; CHECK-SD-NEXT: bl __fixdfti +; CHECK-SD-NEXT: fmov d0, d8 +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: bl __fixdfti +; CHECK-SD-NEXT: fmov d0, d9 +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: bl __fixdfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x4, x21 +; CHECK-SD-NEXT: mov x5, x22 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f64_v3i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 +; CHECK-GI-NEXT: .cfi_offset b8, -56 +; CHECK-GI-NEXT: .cfi_offset b9, -64 +; CHECK-GI-NEXT: fmov d8, d1 +; CHECK-GI-NEXT: fmov d9, d2 +; CHECK-GI-NEXT: bl __fixdfti +; CHECK-GI-NEXT: fmov d0, d8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixdfti +; CHECK-GI-NEXT: fmov d0, d9 +; CHECK-GI-NEXT: mov x21, x0 +; CHECK-GI-NEXT: mov x22, x1 +; CHECK-GI-NEXT: bl __fixdfti +; CHECK-GI-NEXT: mov x4, x0 +; CHECK-GI-NEXT: mov x5, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: mov x2, x21 +; CHECK-GI-NEXT: mov x3, x22 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x double> %a to <3 x i128> ret <3 x i128> %c } define <3 x i128> @fptou_v3f64_v3i128(<3 x double> %a) { -; CHECK-LABEL: fptou_v3f64_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: .cfi_offset b8, -56 -; CHECK-NEXT: .cfi_offset b9, -64 -; CHECK-NEXT: fmov d9, d0 -; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: fmov d8, d2 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: fmov d0, d8 -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: fmov d0, d9 -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v3f64_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: .cfi_offset b8, -56 +; CHECK-SD-NEXT: .cfi_offset b9, -64 +; CHECK-SD-NEXT: fmov d9, d0 +; CHECK-SD-NEXT: fmov d0, d1 +; CHECK-SD-NEXT: fmov d8, d2 +; CHECK-SD-NEXT: bl __fixunsdfti +; CHECK-SD-NEXT: fmov d0, d8 +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: bl __fixunsdfti +; CHECK-SD-NEXT: fmov d0, d9 +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: bl __fixunsdfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x4, x21 +; CHECK-SD-NEXT: mov x5, x22 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v3f64_v3i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 +; CHECK-GI-NEXT: .cfi_offset b8, -56 +; CHECK-GI-NEXT: .cfi_offset b9, -64 +; CHECK-GI-NEXT: fmov d8, d1 +; CHECK-GI-NEXT: fmov d9, d2 +; CHECK-GI-NEXT: bl __fixunsdfti +; CHECK-GI-NEXT: fmov d0, d8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixunsdfti +; CHECK-GI-NEXT: fmov d0, d9 +; CHECK-GI-NEXT: mov x21, x0 +; CHECK-GI-NEXT: mov x22, x1 +; CHECK-GI-NEXT: bl __fixunsdfti +; CHECK-GI-NEXT: mov x4, x0 +; CHECK-GI-NEXT: mov x5, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: mov x2, x21 +; CHECK-GI-NEXT: mov x3, x22 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptoui <3 x double> %a to <3 x i128> ret <3 x i128> %c @@ -3570,154 +3683,284 @@ entry: } define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { -; CHECK-LABEL: fptos_v2f32_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f32_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v2f32_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -24 +; CHECK-GI-NEXT: .cfi_offset b8, -32 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s8, v0.s[1] +; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-GI-NEXT: bl __fixsfti +; CHECK-GI-NEXT: fmov s0, s8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixsfti +; CHECK-GI-NEXT: mov x2, x0 +; CHECK-GI-NEXT: mov x3, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptosi <2 x float> %a to <2 x i128> ret <2 x i128> %c } define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { -; CHECK-LABEL: fptou_v2f32_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f32_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v2f32_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -24 +; CHECK-GI-NEXT: .cfi_offset b8, -32 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s8, v0.s[1] +; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-GI-NEXT: bl __fixunssfti +; CHECK-GI-NEXT: fmov s0, s8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixunssfti +; CHECK-GI-NEXT: mov x2, x0 +; CHECK-GI-NEXT: mov x3, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptoui <2 x float> %a to <2 x i128> ret <2 x i128> %c } define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) { -; CHECK-LABEL: fptos_v3f32_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x21 -; CHECK-NEXT: mov x3, x22 -; CHECK-NEXT: mov x4, x19 -; CHECK-NEXT: mov x5, x20 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f32_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x21 +; CHECK-SD-NEXT: mov x3, x22 +; CHECK-SD-NEXT: mov x4, x19 +; CHECK-SD-NEXT: mov x5, x20 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f32_v3i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 +; CHECK-GI-NEXT: .cfi_offset b8, -56 +; CHECK-GI-NEXT: .cfi_offset b9, -64 +; CHECK-GI-NEXT: mov s8, v0.s[1] +; CHECK-GI-NEXT: mov s9, v0.s[2] +; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-GI-NEXT: bl __fixsfti +; CHECK-GI-NEXT: fmov s0, s8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixsfti +; CHECK-GI-NEXT: fmov s0, s9 +; CHECK-GI-NEXT: mov x21, x0 +; CHECK-GI-NEXT: mov x22, x1 +; CHECK-GI-NEXT: bl __fixsfti +; CHECK-GI-NEXT: mov x4, x0 +; CHECK-GI-NEXT: mov x5, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: mov x2, x21 +; CHECK-GI-NEXT: mov x3, x22 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x float> %a to <3 x i128> ret <3 x i128> %c } define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) { -; CHECK-LABEL: fptou_v3f32_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x21 -; CHECK-NEXT: mov x3, x22 -; CHECK-NEXT: mov x4, x19 -; CHECK-NEXT: mov x5, x20 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v3f32_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: mov s0, v0.s[1] +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x21 +; CHECK-SD-NEXT: mov x3, x22 +; CHECK-SD-NEXT: mov x4, x19 +; CHECK-SD-NEXT: mov x5, x20 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v3f32_v3i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 +; CHECK-GI-NEXT: .cfi_offset b8, -56 +; CHECK-GI-NEXT: .cfi_offset b9, -64 +; CHECK-GI-NEXT: mov s8, v0.s[1] +; CHECK-GI-NEXT: mov s9, v0.s[2] +; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-GI-NEXT: bl __fixunssfti +; CHECK-GI-NEXT: fmov s0, s8 +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixunssfti +; CHECK-GI-NEXT: fmov s0, s9 +; CHECK-GI-NEXT: mov x21, x0 +; CHECK-GI-NEXT: mov x22, x1 +; CHECK-GI-NEXT: bl __fixunssfti +; CHECK-GI-NEXT: mov x4, x0 +; CHECK-GI-NEXT: mov x5, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: mov x2, x21 +; CHECK-GI-NEXT: mov x3, x22 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %c = fptoui <3 x float> %a to <3 x i128> ret <3 x i128> %c @@ -3850,14 +4093,13 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) { ; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvtl v1.2d, v1.2s -; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-NOFP16-NEXT: fcvtzs v2.2d, v1.2d +; CHECK-GI-NOFP16-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2d, v1.2d +; CHECK-GI-NOFP16-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-GI-NOFP16-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NOFP16-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i64: @@ -3915,14 +4157,13 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) { ; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[2] -; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvtl v1.2d, v1.2s -; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-NOFP16-NEXT: fcvtzu v2.2d, v1.2d +; CHECK-GI-NOFP16-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2d, v1.2d +; CHECK-GI-NOFP16-NEXT: fcvtzu v2.2d, v2.2d +; CHECK-GI-NOFP16-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NOFP16-NEXT: mov d1, v0.d[1] ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NOFP16-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i64: @@ -6782,742 +7023,1320 @@ entry: } define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) { -; CHECK-LABEL: fptos_v2f16_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f16_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: bl __fixhfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: bl __fixhfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i128: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x2, s1 +; CHECK-GI-NOFP16-NEXT: asr x1, x0, #63 +; CHECK-GI-NOFP16-NEXT: asr x3, x2, #63 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i128: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: fcvtzs x2, h1 +; CHECK-GI-FP16-NEXT: asr x1, x0, #63 +; CHECK-GI-FP16-NEXT: asr x3, x2, #63 +; CHECK-GI-FP16-NEXT: ret entry: %c = fptosi <2 x half> %a to <2 x i128> ret <2 x i128> %c } define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) { -; CHECK-LABEL: fptou_v2f16_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f16_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: bl __fixunshfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: bl __fixunshfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i128: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov x1, xzr +; CHECK-GI-NOFP16-NEXT: mov x3, xzr +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzu x2, s1 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i128: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: mov x1, xzr +; CHECK-GI-FP16-NEXT: mov x3, xzr +; CHECK-GI-FP16-NEXT: fcvtzu x2, h1 +; CHECK-GI-FP16-NEXT: ret entry: %c = fptoui <2 x half> %a to <2 x i128> ret <2 x i128> %c } define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) { -; CHECK-LABEL: fptos_v3f16_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -entry: - %c = fptosi <3 x half> %a to <3 x i128> - ret <3 x i128> %c -} - -define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) { -; CHECK-LABEL: fptou_v3f16_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: bl __fixunshfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -entry: - %c = fptoui <3 x half> %a to <3 x i128> - ret <3 x i128> %c +; CHECK-SD-LABEL: fptos_v3f16_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: bl __fixhfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: bl __fixhfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: bl __fixhfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x4, x21 +; CHECK-SD-NEXT: mov x5, x22 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i128: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x2, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzs x4, s2 +; CHECK-GI-NOFP16-NEXT: asr x1, x0, #63 +; CHECK-GI-NOFP16-NEXT: asr x3, x2, #63 +; CHECK-GI-NOFP16-NEXT: asr x5, x4, #63 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i128: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: fcvtzs x2, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x4, h2 +; CHECK-GI-FP16-NEXT: asr x1, x0, #63 +; CHECK-GI-FP16-NEXT: asr x3, x2, #63 +; CHECK-GI-FP16-NEXT: asr x5, x4, #63 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi <3 x half> %a to <3 x i128> + ret <3 x i128> %c +} + +define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) { +; CHECK-SD-LABEL: fptou_v3f16_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: bl __fixunshfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: bl __fixunshfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-SD-NEXT: bl __fixunshfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x4, x21 +; CHECK-SD-NEXT: mov x5, x22 +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i128: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov x1, xzr +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: mov x3, xzr +; CHECK-GI-NOFP16-NEXT: mov x5, xzr +; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 +; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 +; CHECK-GI-NOFP16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NOFP16-NEXT: fcvtzu x2, s1 +; CHECK-GI-NOFP16-NEXT: fcvtzu x4, s2 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i128: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov x1, xzr +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: mov x3, xzr +; CHECK-GI-FP16-NEXT: mov x5, xzr +; CHECK-GI-FP16-NEXT: fcvtzu x2, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x4, h2 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui <3 x half> %a to <3 x i128> + ret <3 x i128> %c } define <2 x i64> @fptos_v2f128_v2i64(<2 x fp128> %a) { -; CHECK-LABEL: fptos_v2f128_v2i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: bl __fixtfdi -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfdi -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f128_v2i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NEXT: bl __fixtfdi +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfdi +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v2f128_v2i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfdi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: bl __fixtfdi +; CHECK-GI-NEXT: fmov d0, x19 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.d[1], x0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptosi <2 x fp128> %a to <2 x i64> ret <2 x i64> %c } define <2 x i64> @fptou_v2f128_v2i64(<2 x fp128> %a) { -; CHECK-LABEL: fptou_v2f128_v2i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: bl __fixunstfdi -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixunstfdi -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f128_v2i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NEXT: bl __fixunstfdi +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixunstfdi +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v2f128_v2i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfdi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: bl __fixunstfdi +; CHECK-GI-NEXT: fmov d0, x19 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.d[1], x0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptoui <2 x fp128> %a to <2 x i64> ret <2 x i64> %c } define <3 x i64> @fptos_v3f128_v3i64(<3 x fp128> %a) { -; CHECK-LABEL: fptos_v3f128_v3i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str d8, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset b8, -16 -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: bl __fixtfdi -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfdi -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: fmov d8, x0 -; CHECK-NEXT: bl __fixtfdi -; CHECK-NEXT: fmov d0, d8 -; CHECK-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; CHECK-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f128_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w30, -8 +; CHECK-SD-NEXT: .cfi_offset b8, -16 +; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: bl __fixtfdi +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfdi +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov d8, x0 +; CHECK-SD-NEXT: bl __fixtfdi +; CHECK-SD-NEXT: fmov d0, d8 +; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f128_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfdi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: bl __fixtfdi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x20, x0 +; CHECK-GI-NEXT: bl __fixtfdi +; CHECK-GI-NEXT: fmov d0, x19 +; CHECK-GI-NEXT: fmov d1, x20 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov d2, x0 +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x fp128> %a to <3 x i64> ret <3 x i64> %c } define <3 x i64> @fptou_v3f128_v3i64(<3 x fp128> %a) { -; CHECK-LABEL: fptou_v3f128_v3i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str d8, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset b8, -16 -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: bl __fixunstfdi -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixunstfdi -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: fmov d8, x0 -; CHECK-NEXT: bl __fixunstfdi -; CHECK-NEXT: fmov d0, d8 -; CHECK-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; CHECK-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v3f128_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w30, -8 +; CHECK-SD-NEXT: .cfi_offset b8, -16 +; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: bl __fixunstfdi +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixunstfdi +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov d8, x0 +; CHECK-SD-NEXT: bl __fixunstfdi +; CHECK-SD-NEXT: fmov d0, d8 +; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v3f128_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfdi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: bl __fixunstfdi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x20, x0 +; CHECK-GI-NEXT: bl __fixunstfdi +; CHECK-GI-NEXT: fmov d0, x19 +; CHECK-GI-NEXT: fmov d1, x20 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov d2, x0 +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptoui <3 x fp128> %a to <3 x i64> ret <3 x i64> %c } define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) { -; CHECK-LABEL: fptos_v2f128_v2i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f128_v2i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v2f128_v2i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptosi <2 x fp128> %a to <2 x i32> ret <2 x i32> %c } define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) { -; CHECK-LABEL: fptou_v2f128_v2i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: bl __fixunstfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixunstfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f128_v2i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: bl __fixunstfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixunstfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v2f128_v2i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptoui <2 x fp128> %a to <2 x i32> ret <2 x i32> %c } define <3 x i32> @fptos_v3f128_v3i32(<3 x fp128> %a) { -; CHECK-LABEL: fptos_v3f128_v3i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[2], w0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f128_v3i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[2], w0 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f128_v3i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w20, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[2], w0 +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x fp128> %a to <3 x i32> ret <3 x i32> %c } define <3 x i32> @fptou_v3f128_v3i32(<3 x fp128> %a) { -; CHECK-LABEL: fptou_v3f128_v3i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill -; CHECK-NEXT: bl __fixunstfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixunstfsi -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixunstfsi -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[2], w0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v3f128_v3i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #64 +; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: bl __fixunstfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixunstfsi +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixunstfsi +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[2], w0 +; CHECK-SD-NEXT: add sp, sp, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v3f128_v3i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w20, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[2], w0 +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptoui <3 x fp128> %a to <3 x i32> ret <3 x i32> %c } define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) { -; CHECK-LABEL: fptos_v2f128_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f128_v2i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v2f128_v2i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptosi <2 x fp128> %a to <2 x i16> ret <2 x i16> %c } define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) { -; CHECK-LABEL: fptou_v2f128_v2i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f128_v2i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v2f128_v2i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptoui <2 x fp128> %a to <2 x i16> ret <2 x i16> %c } define <3 x i16> @fptos_v3f128_v3i16(<3 x fp128> %a) { -; CHECK-LABEL: fptos_v3f128_v3i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset b8, -16 -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s8, w0 -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f128_v3i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -8 +; CHECK-SD-NEXT: .cfi_offset b8, -16 +; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov s8, w0 +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f128_v3i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w20, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: fmov s1, w20 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x fp128> %a to <3 x i16> ret <3 x i16> %c } define <3 x i16> @fptou_v3f128_v3i16(<3 x fp128> %a) { -; CHECK-LABEL: fptou_v3f128_v3i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset b8, -16 -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s8, w0 -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v3f128_v3i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -8 +; CHECK-SD-NEXT: .cfi_offset b8, -16 +; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov s8, w0 +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v3f128_v3i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w20, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: fmov s1, w20 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptoui <3 x fp128> %a to <3 x i16> ret <3 x i16> %c } define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) { -; CHECK-LABEL: fptos_v2f128_v2i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f128_v2i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v2f128_v2i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptosi <2 x fp128> %a to <2 x i8> ret <2 x i8> %c } define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) { -; CHECK-LABEL: fptou_v2f128_v2i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f128_v2i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v2f128_v2i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %c = fptoui <2 x fp128> %a to <2 x i8> ret <2 x i8> %c } define <3 x i8> @fptos_v3f128_v3i8(<3 x fp128> %a) { -; CHECK-LABEL: fptos_v3f128_v3i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset b8, -16 -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s8, w0 -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: umov w0, v0.h[0] -; CHECK-NEXT: umov w1, v0.h[1] -; CHECK-NEXT: umov w2, v0.h[2] -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f128_v3i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -8 +; CHECK-SD-NEXT: .cfi_offset b8, -16 +; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov s8, w0 +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: umov w0, v0.h[0] +; CHECK-SD-NEXT: umov w1, v0.h[1] +; CHECK-SD-NEXT: umov w2, v0.h[2] +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f128_v3i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w20, w0 +; CHECK-GI-NEXT: bl __fixtfsi +; CHECK-GI-NEXT: mov w2, w0 +; CHECK-GI-NEXT: mov w0, w19 +; CHECK-GI-NEXT: mov w1, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x fp128> %a to <3 x i8> ret <3 x i8> %c } define <3 x i8> @fptou_v3f128_v3i8(<3 x fp128> %a) { -; CHECK-LABEL: fptou_v3f128_v3i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset b8, -16 -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s8, w0 -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __fixtfsi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.s[1], w0 -; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: umov w0, v0.h[0] -; CHECK-NEXT: umov w1, v0.h[1] -; CHECK-NEXT: umov w2, v0.h[2] -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v3f128_v3i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w30, -8 +; CHECK-SD-NEXT: .cfi_offset b8, -16 +; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov s8, w0 +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: bl __fixtfsi +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.s[1], w0 +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: umov w0, v0.h[0] +; CHECK-SD-NEXT: umov w1, v0.h[1] +; CHECK-SD-NEXT: umov w2, v0.h[2] +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v3f128_v3i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #64 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov w20, w0 +; CHECK-GI-NEXT: bl __fixunstfsi +; CHECK-GI-NEXT: mov w2, w0 +; CHECK-GI-NEXT: mov w0, w19 +; CHECK-GI-NEXT: mov w1, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ret entry: %c = fptoui <3 x fp128> %a to <3 x i8> ret <3 x i8> %c } define <2 x i128> @fptos_v2f128_v2i128(<2 x fp128> %a) { -; CHECK-LABEL: fptos_v2f128_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: bl __fixtfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: bl __fixtfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v2f128_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NEXT: bl __fixtfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: bl __fixtfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v2f128_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfti +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixtfti +; CHECK-GI-NEXT: mov x2, x0 +; CHECK-GI-NEXT: mov x3, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #48 +; CHECK-GI-NEXT: ret entry: %c = fptosi <2 x fp128> %a to <2 x i128> ret <2 x i128> %c } define <2 x i128> @fptou_v2f128_v2i128(<2 x fp128> %a) { -; CHECK-LABEL: fptou_v2f128_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: bl __fixunstfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: bl __fixunstfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v2f128_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #48 +; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w30, -32 +; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NEXT: bl __fixunstfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: bl __fixunstfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #48 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v2f128_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfti +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixunstfti +; CHECK-GI-NEXT: mov x2, x0 +; CHECK-GI-NEXT: mov x3, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #48 +; CHECK-GI-NEXT: ret entry: %c = fptoui <2 x fp128> %a to <2 x i128> ret <2 x i128> %c } define <3 x i128> @fptos_v3f128_v3i128(<3 x fp128> %a) { -; CHECK-LABEL: fptos_v3f128_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: bl __fixtfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: bl __fixtfti -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: bl __fixtfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #80 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptos_v3f128_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #80 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NEXT: bl __fixtfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: bl __fixtfti +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: bl __fixtfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x4, x21 +; CHECK-SD-NEXT: mov x5, x22 +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #80 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptos_v3f128_v3i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #80 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixtfti +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixtfti +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x21, x0 +; CHECK-GI-NEXT: mov x22, x1 +; CHECK-GI-NEXT: bl __fixtfti +; CHECK-GI-NEXT: mov x4, x0 +; CHECK-GI-NEXT: mov x5, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: mov x2, x21 +; CHECK-GI-NEXT: mov x3, x22 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #80 +; CHECK-GI-NEXT: ret entry: %c = fptosi <3 x fp128> %a to <3 x i128> ret <3 x i128> %c } define <3 x i128> @fptou_v3f128_v3i128(<3 x fp128> %a) { -; CHECK-LABEL: fptou_v3f128_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: bl __fixunstfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: bl __fixunstfti -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, x0 -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: bl __fixunstfti -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: mov x2, x19 -; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: mov x4, x21 -; CHECK-NEXT: mov x5, x22 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: add sp, sp, #80 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptou_v3f128_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #80 +; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w30, -48 +; CHECK-SD-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NEXT: bl __fixunstfti +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x20, x1 +; CHECK-SD-NEXT: bl __fixunstfti +; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov x21, x0 +; CHECK-SD-NEXT: mov x22, x1 +; CHECK-SD-NEXT: bl __fixunstfti +; CHECK-SD-NEXT: fmov d0, x0 +; CHECK-SD-NEXT: mov x2, x19 +; CHECK-SD-NEXT: mov x3, x20 +; CHECK-SD-NEXT: mov x4, x21 +; CHECK-SD-NEXT: mov x5, x22 +; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov v0.d[1], x1 +; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: add sp, sp, #80 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptou_v3f128_v3i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #80 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 +; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __fixunstfti +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x19, x0 +; CHECK-GI-NEXT: mov x20, x1 +; CHECK-GI-NEXT: bl __fixunstfti +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov x21, x0 +; CHECK-GI-NEXT: mov x22, x1 +; CHECK-GI-NEXT: bl __fixunstfti +; CHECK-GI-NEXT: mov x4, x0 +; CHECK-GI-NEXT: mov x5, x1 +; CHECK-GI-NEXT: mov x0, x19 +; CHECK-GI-NEXT: mov x1, x20 +; CHECK-GI-NEXT: mov x2, x21 +; CHECK-GI-NEXT: mov x3, x22 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #80 +; CHECK-GI-NEXT: ret entry: %c = fptoui <3 x fp128> %a to <3 x i128> ret <3 x i128> %c