diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8c0a046d3a7e9..d0badb292647b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -11244,8 +11244,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic loads. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to @@ -11385,8 +11385,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic stores. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 10efc889fcc7f..56f2c92311f24 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -65,6 +65,7 @@ Changes to the LLVM IR removed: * `mul` +* A `load atomic` may now be used with vector types. * Updated semantics of `llvm.type.checked.load.relative` to match that of `llvm.load.relative`. diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index d56216e8b637d..43da39bdc44cd 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1904,6 +1904,20 @@ def atomic_load_64 : let MemoryVT = i64; } +def atomic_load_128_v2i64 : + PatFrag<(ops node:$ptr), + (atomic_load node:$ptr)> { + let IsAtomic = true; + let MemoryVT = v2i64; +} + +def atomic_load_128_v4i32 : + PatFrag<(ops node:$ptr), + (atomic_load node:$ptr)> { + let IsAtomic = true; + let MemoryVT = v4i32; +} + def atomic_load_nonext_8 : PatFrag<(ops node:$ptr), (atomic_load_nonext node:$ptr)> { let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; - if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); - else { + if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { + unsigned AS = PtrTy->getAddressSpace(); + Value *BC = Builder.CreateBitCast( + Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); + V = Builder.CreateIntToPtr(BC, I->getType()); + } else + V = Builder.CreateBitOrPointerCast(Result, I->getType()); + } else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index dd9af47da5287..b6e018ba0e454 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -879,6 +879,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); @@ -1067,6 +1068,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4d844f0036a75..42763aab5bb55 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -65,6 +65,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: + R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); + break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); @@ -4607,6 +4622,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: + Res = WidenVecRes_ATOMIC_LOAD(cast(N)); + break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -5988,6 +6006,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, + SDLoc dl, SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { + unsigned NumElts = + WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + assert(FirstVT == WidenVT); + return LdOp; +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) + return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, + FirstVTWidth, dl, DAG); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -7879,29 +7965,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. - if (MemVTs.empty()) { - assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); - if (!FirstVT->isVector()) { - unsigned NumElts = - WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); - EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); - SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); - return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); - } - if (FirstVT == WidenVT) - return LdOp; - - // TODO: We don't currently have any tests that exercise this code path. - assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); - unsigned NumConcat = - WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); - SmallVector ConcatOps(NumConcat); - SDValue UndefVal = DAG.getUNDEF(*FirstVT); - ConcatOps[0] = LdOp; - for (unsigned i = 1; i != NumConcat; ++i) - ConcatOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); - } + if (MemVTs.empty()) + return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl, + DAG); // Load vector by using multiple loads from largest vector to scalar. SmallVector LdOps; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index fdb4ddaafbbcc..68d42cfadd169 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4323,9 +4323,10 @@ void Verifier::visitLoadInst(LoadInst &LI) { Check(LI.getOrdering() != AtomicOrdering::Release && LI.getOrdering() != AtomicOrdering::AcquireRelease, "Load cannot have Release ordering", &LI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic load operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic load operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { @@ -4349,9 +4350,10 @@ void Verifier::visitStoreInst(StoreInst &SI) { Check(SI.getOrdering() != AtomicOrdering::Acquire && SI.getOrdering() != AtomicOrdering::AcquireRelease, "Store cannot have Acquire ordering", &SI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic store operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic store operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b1a3e3c006bb3..3debf30da0a29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, @@ -32066,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { } } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { + if (LI->getType()->getScalarType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; +} + LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 5cb6b3e493a32..43cddb2b53bd6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1839,6 +1839,8 @@ namespace llvm { shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; + TargetLoweringBase::AtomicExpansionKind + shouldCastAtomicLoadInIR(LoadInst *LI) const override; void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 927b2c8b22f05..3143015b7ec66 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1204,6 +1204,18 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)))))), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))), + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + +def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <2 x i64> +def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <4 x i32> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll index 39f33f9fdcacb..6609edc2953cc 100644 --- a/llvm/test/Assembler/atomic.ll +++ b/llvm/test/Assembler/atomic.ll @@ -52,6 +52,25 @@ define void @f(ptr %x) { ; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic + ; CHECK : load atomic <1 x i32>, ptr %x unordered, align 4 + load atomic <1 x i32>, ptr %x unordered, align 4 + ; CHECK : store atomic <1 x i32> splat (i32 3), ptr %x release, align 4 + store atomic <1 x i32> , ptr %x release, align 4 + ; CHECK : load atomic <2 x i32>, ptr %x unordered, align 4 + load atomic <2 x i32>, ptr %x unordered, align 4 + ; CHECK : store atomic <2 x i32> , ptr %x release, align 4 + store atomic <2 x i32> , ptr %x release, align 4 + + ; CHECK : load atomic <2 x ptr>, ptr %x unordered, align 4 + load atomic <2 x ptr>, ptr %x unordered, align 4 + ; CHECK : store atomic <2 x ptr> zeroinitializer, ptr %x release, align 4 + store atomic <2 x ptr> zeroinitializer, ptr %x release, align 4 + + ; CHECK : load atomic <2 x float>, ptr %x unordered, align 4 + load atomic <2 x float>, ptr %x unordered, align 4 + ; CHECK : store atomic <2 x float> , ptr %x release, align 4 + store atomic <2 x float> , ptr %x release, align 4 + ; CHECK: fence syncscope("singlethread") release fence syncscope("singlethread") release ; CHECK: fence seq_cst diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldr r0, [r0] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldr r0, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: movs r1, #0 +; THUMBONE-NEXT: mov r2, r1 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r1, #2 +; ARMV4-NEXT: bl __atomic_load_4 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: ldr r0, [r0] +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: ldr r0, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 45277ce3d26c4..9665d4173e23d 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3 ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -34,6 +34,1331 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-O0: {{.*}} -; CHECK-O3: {{.*}} + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzbl (%rdi), %eax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i8: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i8: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb (%rdi), %al +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i8: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movb (%rdi), %al +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i8: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movb (%rdi), %al +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i16: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i16: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i16: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i16: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %ax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i16: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %ax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i16: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %ax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i8_zext: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzbl (%rdi), %eax +; CHECK-O3-NEXT: movzbl %al, %eax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax +; CHECK-SSE-O3-NEXT: movzbl %al, %eax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax +; CHECK-AVX-O3-NEXT: movzbl %al, %eax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i8_zext: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb (%rdi), %al +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movb (%rdi), %al +; CHECK-SSE-O0-NEXT: movzbl %al, %eax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movb (%rdi), %al +; CHECK-AVX-O0-NEXT: movzbl %al, %eax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + %zret = zext <1 x i8> %ret to <1 x i32> + ret <1 x i32> %zret +} + +define <1 x i64> @atomic_vec1_i16_sext(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_i16_sext: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: movswq %ax, %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: movswq %ax, %rax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: movswq %ax, %rax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i16_sext: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %ax +; CHECK-O0-NEXT: movswq %ax, %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %ax +; CHECK-SSE-O0-NEXT: movswq %ax, %rax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %ax +; CHECK-AVX-O0-NEXT: movswq %ax, %rax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + %sret = sext <1 x i16> %ret to <1 x i64> + ret <1 x i64> %sret +} + +define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) { +; CHECK-LABEL: atomic_vec1_ptr270: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: retq + %ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4 + ret <1 x ptr addrspace(270)> %ret +} + +define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: movd %eax, %xmm0 +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %cx +; CHECK-O0-NEXT: # implicit-def: $eax +; CHECK-O0-NEXT: movw %cx, %ax +; CHECK-O0-NEXT: movd %eax, %xmm0 +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %cx +; CHECK-SSE-O0-NEXT: # implicit-def: $eax +; CHECK-SSE-O0-NEXT: movw %cx, %ax +; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %cx +; CHECK-AVX-O0-NEXT: # implicit-def: $eax +; CHECK-AVX-O0-NEXT: movw %cx, %ax +; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2 + ret <1 x bfloat> %ret +} + +define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_ptr_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 8 + ret <1 x ptr> %ret +} + +define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_i64_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 8 + ret <1 x i64> %ret +} + +define <2 x i8> @atomic_vec2_i8(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: movd %eax, %xmm0 +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_i8: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_i8: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %cx +; CHECK-O0-NEXT: # implicit-def: $eax +; CHECK-O0-NEXT: movw %cx, %ax +; CHECK-O0-NEXT: movd %eax, %xmm0 +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_i8: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %cx +; CHECK-SSE-O0-NEXT: # implicit-def: $eax +; CHECK-SSE-O0-NEXT: movw %cx, %ax +; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_i8: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %cx +; CHECK-AVX-O0-NEXT: # implicit-def: $eax +; CHECK-AVX-O0-NEXT: movw %cx, %ax +; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x i8>, ptr %x acquire, align 4 + ret <2 x i8> %ret +} + +define <2 x i16> @atomic_vec2_i16(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_i16: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_i16: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_i16: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_i16: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_i16: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_i16: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x i16>, ptr %x acquire, align 4 + ret <2 x i16> %ret +} + +define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { +; CHECK-LABEL: atomic_vec2_ptr270: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 + ret <2 x ptr addrspace(270)> %ret +} + +define <2 x i32> @atomic_vec2_i32_align(ptr %x) { +; CHECK-LABEL: atomic_vec2_i32_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 8 + ret <2 x i32> %ret +} + +define <2 x float> @atomic_vec2_float_align(ptr %x) { +; CHECK-LABEL: atomic_vec2_float_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <2 x float>, ptr %x acquire, align 8 + ret <2 x float> %ret +} + +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_half: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_half: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_ptr: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movq (%rsp), %rax +; CHECK-O3-NEXT: popq %rcx +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_ptr: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movq (%rsp), %rax +; CHECK-SSE-O3-NEXT: popq %rcx +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_ptr: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: movq (%rsp), %rax +; CHECK-AVX-O3-NEXT: popq %rcx +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_ptr: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movq (%rsp), %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_ptr: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movq (%rsp), %rax +; CHECK-SSE-O0-NEXT: popq %rcx +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_ptr: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: movq (%rsp), %rax +; CHECK-AVX-O0-NEXT: popq %rcx +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: movd %eax, %xmm0 +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax +; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_half: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax +; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %cx +; CHECK-O0-NEXT: # implicit-def: $eax +; CHECK-O0-NEXT: movw %cx, %ax +; CHECK-O0-NEXT: movd %eax, %xmm0 +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movw (%rdi), %cx +; CHECK-SSE-O0-NEXT: # implicit-def: $eax +; CHECK-SSE-O0-NEXT: movw %cx, %ax +; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_half: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movw (%rdi), %cx +; CHECK-AVX-O0-NEXT: # implicit-def: $eax +; CHECK-AVX-O0-NEXT: movw %cx, %ax +; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-O3-LABEL: atomic_vec1_float: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_float: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_float: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_float: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_float: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_float: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_double_align: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_double_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_double_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_double_align: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_double_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_double_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_i64: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movq (%rsp), %rax +; CHECK-O3-NEXT: popq %rcx +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_i64: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movq (%rsp), %rax +; CHECK-SSE-O3-NEXT: popq %rcx +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_i64: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: movq (%rsp), %rax +; CHECK-AVX-O3-NEXT: popq %rcx +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_i64: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movq (%rsp), %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_i64: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movq (%rsp), %rax +; CHECK-SSE-O0-NEXT: popq %rcx +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_i64: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: movq (%rsp), %rax +; CHECK-AVX-O0-NEXT: popq %rcx +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec1_double: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec1_double: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O3-NEXT: popq %rax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec1_double: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O3-NEXT: popq %rax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec1_double: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec1_double: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O0-NEXT: popq %rax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec1_double: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O0-NEXT: popq %rax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec2_i32: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $8, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_i32: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rax +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $8, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O3-NEXT: popq %rax +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_i32: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: pushq %rax +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $8, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O3-NEXT: popq %rax +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_i32: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $8, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_i32: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rax +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $8, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; CHECK-SSE-O0-NEXT: popq %rax +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_i32: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: pushq %rax +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $8, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; CHECK-AVX-O0-NEXT: popq %rax +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +; Move td records to AtomicExpand +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec2_ptr_align: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movl $2, %esi +; CHECK-O3-NEXT: callq __atomic_load_16@PLT +; CHECK-O3-NEXT: movq %rdx, %xmm1 +; CHECK-O3-NEXT: movq %rax, %xmm0 +; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_ptr_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_ptr_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_ptr_align: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movl $2, %esi +; CHECK-O0-NEXT: callq __atomic_load_16@PLT +; CHECK-O0-NEXT: movq %rdx, %xmm1 +; CHECK-O0-NEXT: movq %rax, %xmm0 +; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_ptr_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_ptr_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + +define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec4_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec4_i8: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_i8: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec4_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec4_i8: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_i8: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x i8>, ptr %x acquire, align 4 + ret <4 x i8> %ret +} + +define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x i16>, ptr %x acquire, align 8 + ret <4 x i16> %ret +} + +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec4_ptr270: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movl $2, %esi +; CHECK-O3-NEXT: callq __atomic_load_16@PLT +; CHECK-O3-NEXT: movq %rdx, %xmm1 +; CHECK-O3-NEXT: movq %rax, %xmm0 +; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec4_ptr270: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_ptr270: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec4_ptr270: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movl $2, %esi +; CHECK-O0-NEXT: callq __atomic_load_16@PLT +; CHECK-O0-NEXT: movq %rdx, %xmm1 +; CHECK-O0-NEXT: movq %rax, %xmm0 +; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec4_ptr270: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_ptr270: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16 + ret <4 x ptr addrspace(270)> %ret +} + +define <4 x half> @atomic_vec4_half(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_half: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x half>, ptr %x acquire, align 8 + ret <4 x half> %ret +} + +define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_bfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x bfloat>, ptr %x acquire, align 8 + ret <4 x bfloat> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec4_float_align: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movl $2, %esi +; CHECK-O3-NEXT: callq __atomic_load_16@PLT +; CHECK-O3-NEXT: movq %rdx, %xmm1 +; CHECK-O3-NEXT: movq %rax, %xmm0 +; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec4_float_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rbx +; CHECK-SSE-O3-NEXT: xorl %eax, %eax +; CHECK-SSE-O3-NEXT: xorl %edx, %edx +; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx +; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx +; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi) +; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1 +; CHECK-SSE-O3-NEXT: movq %rax, %xmm0 +; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE-O3-NEXT: popq %rbx +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_float_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec4_float_align: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movl $2, %esi +; CHECK-O0-NEXT: callq __atomic_load_16@PLT +; CHECK-O0-NEXT: movq %rdx, %xmm1 +; CHECK-O0-NEXT: movq %rax, %xmm0 +; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec4_float_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rbx +; CHECK-SSE-O0-NEXT: xorl %eax, %eax +; CHECK-SSE-O0-NEXT: movl %eax, %ebx +; CHECK-SSE-O0-NEXT: movq %rbx, %rax +; CHECK-SSE-O0-NEXT: movq %rbx, %rdx +; CHECK-SSE-O0-NEXT: movq %rbx, %rcx +; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi) +; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1 +; CHECK-SSE-O0-NEXT: movq %rax, %xmm0 +; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE-O0-NEXT: popq %rbx +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_float_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x float>, ptr %x acquire, align 16 + ret <4 x float> %ret +} + +define <4 x float> @atomic_vec4_float(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec4_float: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $24, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $16, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: addq $24, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec4_float: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $24, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $16, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: addq $24, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_float: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: subq $24, %rsp +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $16, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovaps (%rsp), %xmm0 +; CHECK-AVX-O3-NEXT: addq $24, %rsp +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec4_float: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $24, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $16, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-O0-NEXT: addq $24, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec4_float: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $24, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $16, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: addq $24, %rsp +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_float: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: subq $24, %rsp +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $16, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovaps (%rsp), %xmm0 +; CHECK-AVX-O0-NEXT: addq $24, %rsp +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x float>, ptr %x acquire, align 4 + ret <4 x float> %ret +} + +define <8 x double> @atomic_vec8_double(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec8_double: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $72, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $64, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O3-NEXT: addq $72, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec8_double: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $72, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $64, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O3-NEXT: addq $72, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec8_double: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $72, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $64, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movapd (%rsp), %xmm0 +; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O0-NEXT: addq $72, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec8_double: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $72, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $64, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movapd (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O0-NEXT: addq $72, %rsp +; CHECK-SSE-O0-NEXT: retq + %ret = load atomic <8 x double>, ptr %x acquire, align 4 + ret <8 x double> %ret +} + +define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec16_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $40, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $32, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O3-NEXT: addq $40, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $40, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $32, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O3-NEXT: addq $40, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec16_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: subq $40, %rsp +; CHECK-AVX-O3-NEXT: movq %rdi, %rsi +; CHECK-AVX-O3-NEXT: movq %rsp, %rdx +; CHECK-AVX-O3-NEXT: movl $32, %edi +; CHECK-AVX-O3-NEXT: movl $2, %ecx +; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O3-NEXT: vmovups (%rsp), %ymm0 +; CHECK-AVX-O3-NEXT: addq $40, %rsp +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec16_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $40, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $32, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O0-NEXT: addq $40, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $40, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $32, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O0-NEXT: addq $40, %rsp +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec16_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: subq $40, %rsp +; CHECK-AVX-O0-NEXT: movq %rdi, %rsi +; CHECK-AVX-O0-NEXT: movl $32, %edi +; CHECK-AVX-O0-NEXT: movq %rsp, %rdx +; CHECK-AVX-O0-NEXT: movl $2, %ecx +; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT +; CHECK-AVX-O0-NEXT: vmovups (%rsp), %ymm0 +; CHECK-AVX-O0-NEXT: addq $40, %rsp +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4 + ret <16 x bfloat> %ret +} + +define <32 x half> @atomic_vec32_half(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec32_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: subq $72, %rsp +; CHECK-O3-NEXT: movq %rdi, %rsi +; CHECK-O3-NEXT: movq %rsp, %rdx +; CHECK-O3-NEXT: movl $64, %edi +; CHECK-O3-NEXT: movl $2, %ecx +; CHECK-O3-NEXT: callq __atomic_load@PLT +; CHECK-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O3-NEXT: addq $72, %rsp +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec32_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: subq $72, %rsp +; CHECK-SSE-O3-NEXT: movq %rdi, %rsi +; CHECK-SSE-O3-NEXT: movq %rsp, %rdx +; CHECK-SSE-O3-NEXT: movl $64, %edi +; CHECK-SSE-O3-NEXT: movl $2, %ecx +; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O3-NEXT: addq $72, %rsp +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec32_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: subq $72, %rsp +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movl $64, %edi +; CHECK-O0-NEXT: movq %rsp, %rdx +; CHECK-O0-NEXT: movl $2, %ecx +; CHECK-O0-NEXT: callq __atomic_load@PLT +; CHECK-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-O0-NEXT: addq $72, %rsp +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec32_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: subq $72, %rsp +; CHECK-SSE-O0-NEXT: movq %rdi, %rsi +; CHECK-SSE-O0-NEXT: movl $64, %edi +; CHECK-SSE-O0-NEXT: movq %rsp, %rdx +; CHECK-SSE-O0-NEXT: movl $2, %ecx +; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT +; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2 +; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3 +; CHECK-SSE-O0-NEXT: addq $72, %rsp +; CHECK-SSE-O0-NEXT: retq + %ret = load atomic <32 x half>, ptr %x acquire, align 4 + ret <32 x half> %ret +} diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll index 5929c153d5961..f5c8baa3e931e 100644 --- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll +++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll @@ -1,153 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S %s -passes=atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s ; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and -; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this +; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this ; functionality, please move this test to a target which still is. define float @float_load_expand(ptr %ptr) { -; CHECK-LABEL: @float_load_expand -; CHECK: %1 = load atomic i32, ptr %ptr unordered, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic float, ptr %ptr unordered, align 4 ret float %res } define float @float_load_expand_seq_cst(ptr %ptr) { -; CHECK-LABEL: @float_load_expand_seq_cst -; CHECK: %1 = load atomic i32, ptr %ptr seq_cst, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand_seq_cst( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic float, ptr %ptr seq_cst, align 4 ret float %res } define float @float_load_expand_vol(ptr %ptr) { -; CHECK-LABEL: @float_load_expand_vol -; CHECK: %1 = load atomic volatile i32, ptr %ptr unordered, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand_vol( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic volatile float, ptr %ptr unordered, align 4 ret float %res } define float @float_load_expand_addr1(ptr addrspace(1) %ptr) { -; CHECK-LABEL: @float_load_expand_addr1 -; CHECK: %1 = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 -; CHECK: %2 = bitcast i32 %1 to float -; CHECK: ret float %2 +; CHECK-LABEL: define float @float_load_expand_addr1( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; %res = load atomic float, ptr addrspace(1) %ptr unordered, align 4 ret float %res } define void @float_store_expand(ptr %ptr, float %v) { -; CHECK-LABEL: @float_store_expand -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic i32 %1, ptr %ptr unordered, align 4 +; CHECK-LABEL: define void @float_store_expand( +; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic float %v, ptr %ptr unordered, align 4 ret void } define void @float_store_expand_seq_cst(ptr %ptr, float %v) { -; CHECK-LABEL: @float_store_expand_seq_cst -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic i32 %1, ptr %ptr seq_cst, align 4 +; CHECK-LABEL: define void @float_store_expand_seq_cst( +; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; store atomic float %v, ptr %ptr seq_cst, align 4 ret void } define void @float_store_expand_vol(ptr %ptr, float %v) { -; CHECK-LABEL: @float_store_expand_vol -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic volatile i32 %1, ptr %ptr unordered, align 4 +; CHECK-LABEL: define void @float_store_expand_vol( +; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic volatile i32 [[TMP1]], ptr [[PTR]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic volatile float %v, ptr %ptr unordered, align 4 ret void } define void @float_store_expand_addr1(ptr addrspace(1) %ptr, float %v) { -; CHECK-LABEL: @float_store_expand_addr1 -; CHECK: %1 = bitcast float %v to i32 -; CHECK: store atomic i32 %1, ptr addrspace(1) %ptr unordered, align 4 +; CHECK-LABEL: define void @float_store_expand_addr1( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic float %v, ptr addrspace(1) %ptr unordered, align 4 ret void } define void @pointer_cmpxchg_expand(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 seq_cst monotonic -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst monotonic, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg ptr %ptr, ptr null, ptr %v seq_cst monotonic ret void } define void @pointer_cmpxchg_expand2(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand2 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 release monotonic -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand2( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] release monotonic, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg ptr %ptr, ptr null, ptr %v release monotonic ret void } define void @pointer_cmpxchg_expand3(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand3 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand3( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } define void @pointer_cmpxchg_expand4(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand4 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg weak ptr %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand4( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg weak ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } define void @pointer_cmpxchg_expand5(ptr %ptr, ptr %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand5 -; CHECK: %1 = ptrtoint ptr %v to i64 -; CHECK: %2 = cmpxchg volatile ptr %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr -; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0 -; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1 +; CHECK-LABEL: define void @pointer_cmpxchg_expand5( +; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; cmpxchg volatile ptr %ptr, ptr null, ptr %v seq_cst seq_cst ret void } -define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, - ptr addrspace(2) %v) { -; CHECK-LABEL: @pointer_cmpxchg_expand6 -; CHECK: %1 = ptrtoint ptr addrspace(2) %v to i64 -; CHECK: %2 = cmpxchg ptr addrspace(1) %ptr, i64 0, i64 %1 seq_cst seq_cst -; CHECK: %3 = extractvalue { i64, i1 } %2, 0 -; CHECK: %4 = extractvalue { i64, i1 } %2, 1 -; CHECK: %5 = inttoptr i64 %3 to ptr addrspace(2) -; CHECK: %6 = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) %5, 0 -; CHECK: %7 = insertvalue { ptr addrspace(2), i1 } %6, i1 %4, 1 +define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr, +; CHECK-LABEL: define void @pointer_cmpxchg_expand6( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2) +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } [[TMP6]], i1 [[TMP4]], 1 +; CHECK-NEXT: ret void +; + ptr addrspace(2) %v) { cmpxchg ptr addrspace(1) %ptr, ptr addrspace(2) null, ptr addrspace(2) %v seq_cst seq_cst ret void } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr> +; CHECK-NEXT: ret <2 x ptr> [[TMP7]] +; + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + +define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)> +; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]] +; + %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16 + ret <4 x ptr addrspace(270)> %ret +} + +define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind { +; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8 +; CHECK-NEXT: ret <2 x i16> [[RET]] +; + %ret = load atomic <2 x i16>, ptr %x acquire, align 8 + ret <2 x i16> %ret +} + +define <2 x half> @atomic_vec2_half(ptr %x) nounwind { +; CHECK-LABEL: define <2 x half> @atomic_vec2_half( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8 +; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half> +; CHECK-NEXT: ret <2 x half> [[RET]] +; + %ret = load atomic <2 x half>, ptr %x acquire, align 8 + ret <2 x half> %ret +} + +define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind { +; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %ret = load atomic <4 x i32>, ptr %x acquire, align 16 + ret <4 x i32> %ret +} + +define <4 x float> @atomic_vec4_float(ptr %x) nounwind { +; CHECK-LABEL: define <4 x float> @atomic_vec4_float( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float> +; CHECK-NEXT: ret <4 x float> [[TMP2]] +; + %ret = load atomic <4 x float>, ptr %x acquire, align 16 + ret <4 x float> %ret +} diff --git a/llvm/test/Verifier/atomics.ll b/llvm/test/Verifier/atomics.ll index f835b98b24345..17bf5a0528d73 100644 --- a/llvm/test/Verifier/atomics.ll +++ b/llvm/test/Verifier/atomics.ll @@ -1,14 +1,15 @@ ; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s +; CHECK: atomic store operand must have integer, pointer, floating point, or vector type! +; CHECK: atomic load operand must have integer, pointer, floating point, or vector type! -; CHECK: atomic store operand must have integer, pointer, or floating point type! -; CHECK: atomic load operand must have integer, pointer, or floating point type! +%ty = type { i32 }; -define void @foo(ptr %P, <1 x i64> %v) { - store atomic <1 x i64> %v, ptr %P unordered, align 8 +define void @foo(ptr %P, %ty %v) { + store atomic %ty %v, ptr %P unordered, align 8 ret void } -define <1 x i64> @bar(ptr %P) { - %v = load atomic <1 x i64>, ptr %P unordered, align 8 - ret <1 x i64> %v +define %ty @bar(ptr %P) { + %v = load atomic %ty, ptr %P unordered, align 8 + ret %ty %v }