diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index ff1d3497b77c3..798f112ce7200 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -3048,6 +3048,8 @@ class FunctionDecl : public DeclaratorDecl, static FunctionDecl *castFromDeclContext(const DeclContext *DC) { return static_cast(const_cast(DC)); } + + bool isReferenceableKernel() const; }; /// Represents a member of a struct/union/class. diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h index 386693cabb1fb..df11a79a56b3b 100644 --- a/clang/include/clang/AST/GlobalDecl.h +++ b/clang/include/clang/AST/GlobalDecl.h @@ -70,15 +70,15 @@ class GlobalDecl { GlobalDecl(const VarDecl *D) { Init(D);} GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0) : MultiVersionIndex(MVIndex) { - if (!D->hasAttr()) { - Init(D); + if (D->isReferenceableKernel()) { + Value.setPointerAndInt(D, unsigned(getDefaultKernelReference(D))); return; } - Value.setPointerAndInt(D, unsigned(getDefaultKernelReference(D))); + Init(D); } GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind) : Value(D, unsigned(Kind)) { - assert(D->hasAttr() && "Decl is not a GPU kernel!"); + assert(D->isReferenceableKernel() && "Decl is not a GPU kernel!"); } GlobalDecl(const NamedDecl *D) { Init(D); } GlobalDecl(const BlockDecl *D) { Init(D); } @@ -131,12 +131,13 @@ class GlobalDecl { KernelReferenceKind getKernelReferenceKind() const { assert(((isa(getDecl()) && - cast(getDecl())->hasAttr()) || + cast(getDecl())->isReferenceableKernel()) || (isa(getDecl()) && cast(getDecl()) ->getTemplatedDecl() ->hasAttr())) && "Decl is not a GPU kernel!"); + return static_cast(Value.getInt()); } @@ -160,8 +161,9 @@ class GlobalDecl { } static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) { - return D->getLangOpts().CUDAIsDevice ? KernelReferenceKind::Kernel - : KernelReferenceKind::Stub; + return (D->hasAttr() || D->getLangOpts().CUDAIsDevice) + ? KernelReferenceKind::Kernel + : KernelReferenceKind::Stub; } GlobalDecl getWithDecl(const Decl *D) { @@ -197,7 +199,7 @@ class GlobalDecl { GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) { assert(isa(getDecl()) && - cast(getDecl())->hasAttr() && + cast(getDecl())->isReferenceableKernel() && "Decl is not a GPU kernel!"); GlobalDecl Result(*this); Result.Value.setInt(unsigned(Kind)); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 0e4d69392e8c7..83116ecc0f47b 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -5468,6 +5468,10 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) { /*TrailingRequiresClause=*/{}); } +bool FunctionDecl::isReferenceableKernel() const { + return hasAttr() || hasAttr(); +} + BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { return new (C, DC) BlockDecl(DC, L); } diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index e8e5f2fa0cc12..4deed08d693ac 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -695,9 +695,9 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK, GD = GlobalDecl(CD, Ctor_Base); else if (const CXXDestructorDecl *DD = dyn_cast(ND)) GD = GlobalDecl(DD, Dtor_Base); - else if (ND->hasAttr()) - GD = GlobalDecl(cast(ND)); - else + else if (auto FD = dyn_cast(ND)) { + GD = FD->isReferenceableKernel() ? GlobalDecl(FD) : GlobalDecl(ND); + } else GD = GlobalDecl(ND); MC->mangleName(GD, Out); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index fdd84d0bf7c5c..eb25b19bbdc74 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -526,6 +526,7 @@ class CXXNameMangler { void mangleSourceName(const IdentifierInfo *II); void mangleRegCallName(const IdentifierInfo *II); void mangleDeviceStubName(const IdentifierInfo *II); + void mangleOCLDeviceStubName(const IdentifierInfo *II); void mangleSourceNameWithAbiTags( const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr); void mangleLocalName(GlobalDecl GD, @@ -1561,8 +1562,13 @@ void CXXNameMangler::mangleUnqualifiedName( bool IsDeviceStub = FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; + bool IsOCLDeviceStub = + FD && FD->hasAttr() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleDeviceStubName(II); + else if (IsOCLDeviceStub) + mangleOCLDeviceStubName(II); else if (IsRegCall) mangleRegCallName(II); else @@ -1780,6 +1786,15 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) { << II->getName(); } +void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) { + // ::= __clang_ocl_kern_imp_ + // ::= [n] + // ::= + StringRef OCLDeviceStubNamePrefix = "__clang_ocl_kern_imp_"; + Out << II->getLength() + OCLDeviceStubNamePrefix.size() + << OCLDeviceStubNamePrefix << II->getName(); +} + void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) { // ::= // ::= [n] diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index b44ab23f1d0e1..141957c1cdce0 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -540,9 +540,9 @@ class ASTNameGenerator::Implementation { GD = GlobalDecl(CtorD, Ctor_Complete); else if (const auto *DtorD = dyn_cast(D)) GD = GlobalDecl(DtorD, Dtor_Complete); - else if (D->hasAttr()) - GD = GlobalDecl(cast(D)); - else + else if (const FunctionDecl *FD = dyn_cast(D)) { + GD = FD->isReferenceableKernel() ? GlobalDecl(FD) : GlobalDecl(D); + } else GD = GlobalDecl(D); MC->mangleName(GD, OS); return false; diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 7e964124a9fec..4d14614fc1ec7 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1162,9 +1162,15 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD, ->getTemplatedDecl() ->hasAttr())) && GD.getKernelReferenceKind() == KernelReferenceKind::Stub; + bool IsOCLDeviceStub = + ND && isa(ND) && ND->hasAttr() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub; if (IsDeviceStub) mangleSourceName( (llvm::Twine("__device_stub__") + II->getName()).str()); + else if (IsOCLDeviceStub) + mangleSourceName( + (llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str()); else mangleSourceName(II->getName()); break; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b202255c3a15b..b25cdf9523ae1 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -499,7 +499,8 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, /// Arrange the argument and result information for the declaration or /// definition of the given function. const CGFunctionInfo & -CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { +CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) { + const FunctionDecl *FD = cast(GD.getDecl()); if (const CXXMethodDecl *MD = dyn_cast(FD)) if (MD->isImplicitObjectMemberFunction()) return arrangeCXXMethodDeclaration(MD); @@ -509,6 +510,13 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { assert(isa(FTy)); setCUDAKernelCallingConvention(FTy, CGM, FD); + if (FD->hasAttr() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { + const FunctionType *FT = FTy->getAs(); + CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT); + FTy = FT->getCanonicalTypeUnqualified(); + } + // When declaring a function without a prototype, always use a // non-variadic type. if (CanQual noProto = FTy.getAs()) { @@ -581,13 +589,11 @@ CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType, const CGFunctionInfo & CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) { // FIXME: Do we need to handle ObjCMethodDecl? - const FunctionDecl *FD = cast(GD.getDecl()); - if (isa(GD.getDecl()) || isa(GD.getDecl())) return arrangeCXXStructorDeclaration(GD); - return arrangeFunctionDeclaration(FD); + return arrangeFunctionDeclaration(GD); } /// Arrange a thunk that takes 'this' as the first parameter followed by @@ -2391,7 +2397,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // Collect function IR attributes from the callee prototype if we have one. AddAttributesFromFunctionProtoType(getContext(), FuncAttrs, CalleeInfo.getCalleeFunctionProtoType()); - const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl(); // Attach assumption attributes to the declaration. If this is a call @@ -2498,7 +2503,11 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, NumElemsParam); } - if (TargetDecl->hasAttr()) { + if (TargetDecl->hasAttr() && + CallingConv != CallingConv::CC_C && + CallingConv != CallingConv::CC_SpirFunction) { + // Check CallingConv to avoid adding uniform-work-group-size attribute to + // OpenCL Kernel Stub if (getLangOpts().OpenCLVersion <= 120) { // OpenCL v1.2 Work groups are always uniform FuncAttrs.addAttribute("uniform-work-group-size", "true"); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 7fe2d9582178e..de7c577a23493 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5752,6 +5752,12 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { return CGCallee::forDirect(CalleePtr, GD); } +static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) { + if (FD->hasAttr()) + return GlobalDecl(FD, KernelReferenceKind::Stub); + return GlobalDecl(FD); +} + CGCallee CodeGenFunction::EmitCallee(const Expr *E) { E = E->IgnoreParens(); @@ -5765,7 +5771,7 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { // Resolve direct calls. } else if (auto DRE = dyn_cast(E)) { if (auto FD = dyn_cast(DRE->getDecl())) { - return EmitDirectCallee(*this, FD); + return EmitDirectCallee(*this, getGlobalDeclForDirectCall(FD)); } } else if (auto ME = dyn_cast(E)) { if (auto FD = dyn_cast(ME->getMemberDecl())) { @@ -6134,6 +6140,10 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const auto *FnType = cast(PointeeType); + if (const auto *FD = dyn_cast_or_null(TargetDecl); + FD && FD->hasAttr()) + CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType); + // If we are checking indirect calls and this call is indirect, check that the // function pointer is a member of the bit set for the function type. if (SanOpts.has(SanitizerKind::CFIICall) && diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index b55003b9b0bbb..232d48141a0c0 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1595,6 +1595,26 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // Implicit copy-assignment gets the same special treatment as implicit // copy-constructors. emitImplicitAssignmentOperatorBody(Args); + } else if (FD->hasAttr() && + GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) { + CallArgList CallArgs; + for (unsigned i = 0; i < Args.size(); ++i) { + Address ArgAddr = GetAddrOfLocalVar(Args[i]); + QualType ArgQualType = Args[i]->getType(); + RValue ArgRValue = convertTempToRValue(ArgAddr, ArgQualType, Loc); + CallArgs.add(ArgRValue, ArgQualType); + } + GlobalDecl GDStub = GlobalDecl(FD, KernelReferenceKind::Stub); + const FunctionType *FT = cast(FD->getType()); + CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT); + const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( + CallArgs, FT, /*ChainCall=*/false); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::Constant *GDStubFunctionPointer = + CGM.getRawFunctionPointer(GDStub, FTy); + CGCallee GDStubCallee = CGCallee::forDirect(GDStubFunctionPointer, GDStub); + EmitCall(FnInfo, GDStubCallee, ReturnValueSlot(), CallArgs, nullptr, false, + Loc); } else if (Body) { EmitFunctionBody(Body); } else diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 8f9cf965af2b9..0154799498f5e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1903,6 +1903,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } else if (FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); + } else if (FD && FD->hasAttr() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { + Out << "__clang_ocl_kern_imp_" << II->getName(); } else { Out << II->getName(); } @@ -3890,6 +3893,9 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast(Global)) { + if (FD->hasAttr() && FD->doesThisDeclarationHaveABody()) + addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub)); + // Update deferred annotations with the latest declaration if the function // function was already used or defined. if (FD->hasAttr()) { @@ -4857,6 +4863,11 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, if (!Ty) { const auto *FD = cast(GD.getDecl()); Ty = getTypes().ConvertType(FD->getType()); + if (FD->hasAttr() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { + const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); + Ty = getTypes().GetFunctionType(FI); + } } // Devirtualized destructor calls may come through here instead of via diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index 5aebf9a212237..307048bcc510d 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -207,7 +207,7 @@ class CodeGenTypes { /// Free functions are functions that are compatible with an ordinary /// C function pointer type. - const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD); + const CGFunctionInfo &arrangeFunctionDeclaration(const GlobalDecl GD); const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args, const FunctionType *Ty, bool ChainCall); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 64a9a5554caf7..981488eb4dc37 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -117,6 +117,12 @@ unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::SPIR_KERNEL; } +void TargetCodeGenInfo::setOCLKernelStubCallingConvention( + const FunctionType *&FT) const { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_C)); +} + llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const { return llvm::ConstantPointerNull::get(T); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 5df19fbef1e5b..ef4e500f7a38c 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -400,7 +400,7 @@ class TargetCodeGenInfo { virtual bool shouldEmitDWARFBitFieldSeparators() const { return false; } virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} - + virtual void setOCLKernelStubCallingConvention(const FunctionType *&FT) const; /// Return the device-side type for the CUDA device builtin surface type. virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const { // By default, no change from the original one. diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 225d9dfbd980b..f35c124f50aa0 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -58,6 +58,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { llvm::Type *getSPIRVImageTypeFromHLSLResource( const HLSLAttributedResourceType::Attributes &attributes, llvm::Type *ElementType, llvm::LLVMContext &Ctx) const; + void + setOCLKernelStubCallingConvention(const FunctionType *&FT) const override; }; class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { public: @@ -230,6 +232,12 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( } } +void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention( + const FunctionType *&FT) const { + FT = getABIInfo().getContext().adjustFunctionType( + FT, FT->getExtInfo().withCallingConv(CC_SpirFunction)); +} + LangAS SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index effdeb9546800..789aae7a5c34c 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=X86 %s // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN %s // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN20 %s @@ -46,7 +46,77 @@ struct LargeStructTwoMember { struct LargeStructOneMember g_s; #endif -// +Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { + Mat4X4 out; + return out; +} + +kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { + out[0] = foo(in[1]); +} + +Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { + Mat64X64 out; + return out; +} + +kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { + out[0] = foo_large(in[1]); +} + +void FuncOneMember(struct StructOneMember u) { + u.x = (int2)(0, 0); +} + +void FuncOneLargeMember(struct LargeStructOneMember u) { + u.x[0] = (int2)(0, 0); +} + +#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) + +void test_indirect_arg_globl(void) { + FuncOneLargeMember(g_s); +} +#endif + +kernel void test_indirect_arg_local(void) { + local struct LargeStructOneMember l_s; + FuncOneLargeMember(l_s); +} + +void test_indirect_arg_private(void) { + struct LargeStructOneMember p_s; + FuncOneLargeMember(p_s); +} + +kernel void KernelOneMember(struct StructOneMember u) { + FuncOneMember(u); +} + +kernel void KernelOneMemberSpir(global struct StructOneMember* u) { + FuncOneMember(*u); +} + +kernel void KernelLargeOneMember(struct LargeStructOneMember u) { + FuncOneLargeMember(u); +} + +void FuncTwoMember(struct StructTwoMember u) { + u.y = (int2)(0, 0); +} + +void FuncLargeTwoMember(struct LargeStructTwoMember u) { + u.y[0] = (int2)(0, 0); +} + +kernel void KernelTwoMember(struct StructTwoMember u) { + FuncTwoMember(u); +} + +kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { + FuncLargeTwoMember(u); +} + // X86-LABEL: define void @foo( // X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { // X86-NEXT: [[ENTRY:.*:]] @@ -54,64 +124,25 @@ struct LargeStructOneMember g_s; // X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 // X86-NEXT: ret void // -// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 -// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -// AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr -// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 -// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 -// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -// SPIR-LABEL: define dso_local spir_func void @foo( -// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 -// AMDGCN30-GVAR-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -// AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo( -// AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 -// AMDGCN30-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 -// AMDGCN30-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] -// -Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { - Mat4X4 out; - return out; -} - // // X86-LABEL: define spir_kernel void @ker( // X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3:[0-9]+]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @__clang_ocl_kern_imp_ker( +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 // X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 // X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 @@ -121,118 +152,10 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 // X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) -// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] +// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] // X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) // X86-NEXT: ret void // -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr -// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_kernel void @ker( -// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 -// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 -// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) -// SPIR-NEXT: call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]] -// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN30-GVAR-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN30-GVAR-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) -// AMDGCN30-GVAR-NEXT: ret void -// -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker( -// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) -// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN30-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN30-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 -// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) -// AMDGCN30-NEXT: ret void -// -kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { - out[0] = foo(in[1]); -} - // // X86-LABEL: define void @foo_large( // X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { @@ -241,48 +164,22 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { // X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 // X86-NEXT: ret void // -// AMDGCN-LABEL: define dso_local void @foo_large( -// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN-NEXT: ret void // -// AMDGCN20-LABEL: define dso_local void @foo_large( -// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN20-NEXT: ret void +// X86-LABEL: define spir_kernel void @ker_large( +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3]] +// X86-NEXT: ret void // -// SPIR-LABEL: define dso_local spir_func void @foo_large( -// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local void @foo_large( -// AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN30-GVAR-NEXT: ret void -// -// AMDGCN30-LABEL: define dso_local void @foo_large( -// AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) -// AMDGCN30-NEXT: ret void -// -Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { - Mat64X64 out; - return out; -} - // -// X86-LABEL: define spir_kernel void @ker_large( -// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// X86-LABEL: define void @__clang_ocl_kern_imp_ker_large( +// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: [[ENTRY:.*:]] // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 @@ -299,11 +196,243 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) // X86-NEXT: ret void // +// +// X86-LABEL: define void @FuncOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8 +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @FuncOneLargeMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// +// X86-LABEL: define spir_kernel void @test_indirect_arg_local( +// X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @__clang_ocl_kern_imp_test_indirect_arg_local( +// X86-SAME: ) #[[ATTR0]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @test_indirect_arg_private( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define spir_kernel void @KernelOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define spir_kernel void @KernelOneMemberSpir( +// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMemberSpir( +// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4 +// X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define spir_kernel void @KernelLargeOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @FuncTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8 +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @FuncLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// +// X86-LABEL: define spir_kernel void @KernelTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) +// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define spir_kernel void @KernelLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// X86-NEXT: [[ENTRY:.*:]] +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) +// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] +// X86-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4:[0-9]+]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_ker( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR4]] +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @foo_large( +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN-NEXT: ret void +// +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_ker_large( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 @@ -313,71 +442,1150 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large( -// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr -// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) -// AMDGCN20-NEXT: ret void // -// SPIR-LABEL: define dso_local spir_kernel void @ker_large( -// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 -// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 -// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 -// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) -// SPIR-NEXT: call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) -// SPIR-NEXT: ret void +// AMDGCN-LABEL: define dso_local void @FuncOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_test_indirect_arg_local( +// AMDGCN-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN-SAME: ) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMemberSpir( +// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 +// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 +// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4:[0-9]+]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_ker( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR4]] +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @foo_large( +// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_ker_large( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @FuncOneMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl( +// AMDGCN20-SAME: ) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_test_indirect_arg_local( +// AMDGCN20-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN20-SAME: ) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 +// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMemberSpir( +// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr +// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U]], i64 800, i1 false) +// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 +// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember( +// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[TMP2]], align 8 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +// AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 +// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 +// AMDGCN20-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) +// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// AMDGCN20-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN20-NEXT: [[ENTRY:.*:]] +// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U]], i64 480, i1 false) +// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN20-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @foo( +// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @ker( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3:[0-9]+]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_ker( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false) +// SPIR-NEXT: call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4:[0-9]+]] +// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @foo_large( +// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @ker_large( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_ker_large( +// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0 +// SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4 +// SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false) +// SPIR-NEXT: call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] +// SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @FuncOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local( +// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_test_indirect_arg_local( +// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private( +// SPIR-SAME: ) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_KernelOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir( +// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_KernelOneMemberSpir( +// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8 +// SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 +// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 +// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) +// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_KernelLargeOneMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @FuncTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[Y]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 +// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_KernelTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// SPIR-NEXT: ret void +// +// +// SPIR-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] { +// SPIR-NEXT: [[ENTRY:.*:]] +// SPIR-NEXT: call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// SPIR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN30-GVAR-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4:[0-9]+]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_ker( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN30-GVAR-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN30-GVAR-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @foo_large( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_ker_large( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN30-GVAR-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_globl( +// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( +// AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_test_indirect_arg_local( +// AMDGCN30-GVAR-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private( +// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMemberSpir( +// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember( +// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN30-GVAR-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-GVAR-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] +// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN30-GVAR-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN30-GVAR-NEXT: ret void +// +// +// AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN30-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN30-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4:[0-9]+]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_ker( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN30-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR4]] +// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN30-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30-LABEL: define dso_local void @foo_large( +// AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN30-NEXT: ret void // -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large( -// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN30-GVAR-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker_large( // AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { // AMDGCN30-NEXT: [[ENTRY:.*:]] // AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_ker_large( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) // AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) // AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 @@ -387,77 +1595,10 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN30-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] // AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) // AMDGCN30-NEXT: ret void // -kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { - out[0] = foo_large(in[1]); -} - -// -// X86-LABEL: define void @FuncOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8 -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local void @FuncOneMember( -// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local void @FuncOneMember( -// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_func void @FuncOneMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember( -// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local void @FuncOneMember( // AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { @@ -472,266 +1613,44 @@ kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { // AMDGCN30-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 // AMDGCN30-NEXT: ret void // -void FuncOneMember(struct StructOneMember u) { - u.x = (int2)(0, 0); -} - -// -// X86-LABEL: define void @FuncOneLargeMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( -// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember( -// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember( -// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local void @FuncOneLargeMember( // AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 -// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN30-NEXT: ret void -// -void FuncOneLargeMember(struct LargeStructOneMember u) { - u.x[0] = (int2)(0, 0); -} - -#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) -// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl( -// AMDGCN20-SAME: ) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_globl( -// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) -// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void -// -void test_indirect_arg_globl(void) { - FuncOneLargeMember(g_s); -} -#endif - -// -// X86-LABEL: define spir_kernel void @test_indirect_arg_local( -// X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4 -// X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( -// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( -// AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local( -// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 -// SPIR-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false) -// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// SPIR-NEXT: ret void +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN30-NEXT: ret void // -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( -// AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( // AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { // AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR4]] // AMDGCN30-NEXT: ret void // -kernel void test_indirect_arg_local(void) { - local struct LargeStructOneMember l_s; - FuncOneLargeMember(l_s); -} - -// -// X86-LABEL: define void @test_indirect_arg_private( -// X86-SAME: ) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( -// AMDGCN-SAME: ) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private( -// AMDGCN20-SAME: ) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void // -// SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private( -// SPIR-SAME: ) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8 -// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] -// SPIR-NEXT: ret void +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_test_indirect_arg_local( +// AMDGCN30-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void // -// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private( -// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local void @test_indirect_arg_private( // AMDGCN30-SAME: ) #[[ATTR0]] { // AMDGCN30-NEXT: [[ENTRY:.*:]] // AMDGCN30-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // -void test_indirect_arg_private(void) { - struct LargeStructOneMember p_s; - FuncOneLargeMember(p_s); -} - -// -// X86-LABEL: define spir_kernel void @KernelOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( -// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( -// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 -// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( -// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( // AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { @@ -741,69 +1660,21 @@ void test_indirect_arg_private(void) { // AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN30-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // -kernel void KernelOneMember(struct StructOneMember u) { - FuncOneMember(u); -} - -// -// X86-LABEL: define spir_kernel void @KernelOneMemberSpir( -// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4 -// X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 -// X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 -// X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) -// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( -// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( -// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr -// AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 -// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void // -// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir( -// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4 -// SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8 -// SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4 -// SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4 -// SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false) -// SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// SPIR-NEXT: ret void +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void // -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( -// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( // AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { @@ -811,144 +1682,41 @@ kernel void KernelOneMember(struct StructOneMember u) { // AMDGCN30-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN30-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 // AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 -// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // -kernel void KernelOneMemberSpir(global struct StructOneMember* u) { - FuncOneMember(*u); -} - -// -// X86-LABEL: define spir_kernel void @KernelLargeOneMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false) -// AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void // -// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( -// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMemberSpir( +// AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { // AMDGCN30-NEXT: [[ENTRY:.*:]] -// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN30-NEXT: ret void -// -kernel void KernelLargeOneMember(struct LargeStructOneMember u) { - FuncOneLargeMember(u); -} - -// -// X86-LABEL: define void @FuncTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8 -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local void @FuncTwoMember( -// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 -// AMDGCN-NEXT: ret void +// AMDGCN30-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN30-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8 +// AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 +// AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void // -// AMDGCN20-LABEL: define dso_local void @FuncTwoMember( -// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 -// AMDGCN20-NEXT: ret void // -// SPIR-LABEL: define dso_local spir_func void @FuncTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[Y]], align 8 -// SPIR-NEXT: ret void +// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember( +// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void // -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember( -// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local void @FuncTwoMember( // AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { @@ -965,75 +1733,6 @@ kernel void KernelLargeOneMember(struct LargeStructOneMember u) { // AMDGCN30-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 // AMDGCN30-NEXT: ret void // -void FuncTwoMember(struct StructTwoMember u) { - u.y = (int2)(0, 0); -} - -// -// X86-LABEL: define void @FuncLargeTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 -// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) -// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 -// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( -// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember( -// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr -// AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr -// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 -// AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 -// SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 -// SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 -// SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 -// SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8 -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember( -// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 -// AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local void @FuncLargeTwoMember( // AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { @@ -1048,74 +1747,6 @@ void FuncTwoMember(struct StructTwoMember u) { // AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 // AMDGCN30-NEXT: ret void // -void FuncLargeTwoMember(struct LargeStructTwoMember u) { - u.y[0] = (int2)(0, 0); -} - -// -// X86-LABEL: define spir_kernel void @KernelTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 -// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -// AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void -// -// SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void -// -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( // AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { @@ -1131,67 +1762,25 @@ void FuncLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 // AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // -kernel void KernelTwoMember(struct StructTwoMember u) { - FuncTwoMember(u); -} - -// -// X86-LABEL: define spir_kernel void @KernelLargeTwoMember( -// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]] -// X86-NEXT: ret void -// -// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN-NEXT: ret void -// -// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN20-NEXT: [[ENTRY:.*:]] -// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr -// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 -// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 -// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false) -// AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: ret void // -// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember( -// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] { -// SPIR-NEXT: [[ENTRY:.*:]] -// SPIR-NEXT: call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// SPIR-NEXT: ret void +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember( +// AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN30-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void // -// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( -// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { -// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]] -// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 -// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 -// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 -// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 -// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 -// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-GVAR-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] -// AMDGCN30-GVAR-NEXT: ret void // // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( // AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { @@ -1203,12 +1792,18 @@ kernel void KernelTwoMember(struct StructTwoMember u) { // AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 // AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 -// AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]] +// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] +// AMDGCN30-NEXT: ret void +// +// +// AMDGCN30-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN30-NEXT: [[ENTRY:.*:]] +// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]] // AMDGCN30-NEXT: ret void // -kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { - FuncLargeTwoMember(u); -} //. // X86: [[META4]] = !{i32 1, i32 1} // X86: [[META5]] = !{!"none", !"none"} diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl index 2f8ba99a3e416..6dc488c40da7f 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 --include-generated-funcs // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s typedef int int2 __attribute__((ext_vector_type(2))); @@ -42,6 +42,78 @@ struct LargeStructOneMember g_s; #endif +Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { + Mat4X4 out; + return out; +} + +// Expect two mem copies: one for the argument "in", and one for +// the return value. + +kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { + out[0] = foo(in[1]); +} + +Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { + Mat64X64 out; + return out; +} + +kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { + out[0] = foo_large(in[1]); +} + +void FuncOneMember(struct StructOneMember u) { + u.x = (int2)(0, 0); +} + +void FuncOneLargeMember(struct LargeStructOneMember u) { + u.x[0] = (int2)(0, 0); +} + +#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) +void test_indirect_arg_globl(void) { + FuncOneLargeMember(g_s); +} +#endif + +kernel void test_indirect_arg_local(void) { + local struct LargeStructOneMember l_s; + FuncOneLargeMember(l_s); +} + +void test_indirect_arg_private(void) { + struct LargeStructOneMember p_s; + FuncOneLargeMember(p_s); +} + +kernel void KernelOneMember(struct StructOneMember u) { + FuncOneMember(u); +} + +kernel void KernelOneMemberSpir(global struct StructOneMember* u) { + FuncOneMember(*u); +} + +kernel void KernelLargeOneMember(struct LargeStructOneMember u) { + FuncOneLargeMember(u); +} + +void FuncTwoMember(struct StructTwoMember u) { + u.y = (int2)(0, 0); +} + +void FuncLargeTwoMember(struct LargeStructTwoMember u) { + u.y[0] = (int2)(0, 0); +} + +kernel void KernelTwoMember(struct StructTwoMember u) { + FuncTwoMember(u); +} + +kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { + FuncLargeTwoMember(u); +} // AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( // AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -54,19 +126,27 @@ struct LargeStructOneMember g_s; // AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 // AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] // -Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { - Mat4X4 out; - return out; -} - -// Expect two mem copies: one for the argument "in", and one for -// the return value. - +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ker(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4:[0-9]+]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_ker( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr // AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr @@ -78,17 +158,14 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] +// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR4]] // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 // AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) // AMDGCN-NEXT: ret void // -kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { - out[0] = foo(in[1]); -} - +// // AMDGCN-LABEL: define dso_local void @foo_large( // AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -97,16 +174,27 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { // AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) // AMDGCN-NEXT: ret void // -Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { - Mat64X64 out; - return out; -} - +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr +// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ker_large(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_ker_large( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr @@ -118,14 +206,11 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR4]] // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // -kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { - out[0] = foo_large(in[1]); -} - +// // AMDGCN-LABEL: define dso_local void @FuncOneMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -141,10 +226,7 @@ kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { // AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 // AMDGCN-NEXT: ret void // -void FuncOneMember(struct StructOneMember u) { - u.x = (int2)(0, 0); -} - +// // AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -160,37 +242,32 @@ void FuncOneMember(struct StructOneMember u) { // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // -void FuncOneLargeMember(struct LargeStructOneMember u) { - u.x[0] = (int2)(0, 0); -} - -#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) +// // AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl( // AMDGCN-SAME: ) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -void test_indirect_arg_globl(void) { - FuncOneLargeMember(g_s); -} -#endif - +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( // AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { // AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_test_indirect_arg_local() #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_test_indirect_arg_local( +// AMDGCN-SAME: ) #[[ATTR2]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -kernel void test_indirect_arg_local(void) { - local struct LargeStructOneMember l_s; - FuncOneLargeMember(l_s); -} - +// // AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( // AMDGCN-SAME: ) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -198,14 +275,10 @@ kernel void test_indirect_arg_local(void) { // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr // AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -void test_indirect_arg_private(void) { - struct LargeStructOneMember p_s; - FuncOneLargeMember(p_s); -} - +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -215,13 +288,23 @@ void test_indirect_arg_private(void) { // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 -// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -kernel void KernelOneMember(struct StructOneMember u) { - FuncOneMember(u); -} - +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( // AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -229,32 +312,48 @@ kernel void KernelOneMember(struct StructOneMember u) { // AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr // AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMemberSpir(ptr addrspace(1) noundef align 8 [[TMP0]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMemberSpir( +// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META14]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr +// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 -// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -kernel void KernelOneMemberSpir(global struct StructOneMember* u) { - FuncOneMember(*u); -} - +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( // AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 // AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -kernel void KernelLargeOneMember(struct LargeStructOneMember u) { - FuncOneLargeMember(u); -} - +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U]], i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// // AMDGCN-LABEL: define dso_local void @FuncTwoMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -272,10 +371,7 @@ kernel void KernelLargeOneMember(struct LargeStructOneMember u) { // AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 // AMDGCN-NEXT: ret void // -void FuncTwoMember(struct StructTwoMember u) { - u.y = (int2)(0, 0); -} - +// // AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -291,10 +387,7 @@ void FuncTwoMember(struct StructTwoMember u) { // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // -void FuncLargeTwoMember(struct LargeStructTwoMember u) { - u.y[0] = (int2)(0, 0); -} - +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( // AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] @@ -310,18 +403,31 @@ void FuncLargeTwoMember(struct LargeStructTwoMember u) { // AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 // AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -kernel void KernelTwoMember(struct StructTwoMember u) { - FuncTwoMember(u); -} - +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( // AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) -// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 @@ -329,13 +435,22 @@ kernel void KernelTwoMember(struct StructTwoMember u) { // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 // AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 -// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false) -// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5) +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U]], i64 480, i1 false) +// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]] // AMDGCN-NEXT: ret void // -kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { - FuncLargeTwoMember(u); -} //. // AMDGCN: [[META4]] = !{i32 1, i32 1} // AMDGCN: [[META5]] = !{!"none", !"none"} diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 6776e2227847e..4d09fc3ffb70b 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -423,7 +423,7 @@ struct_char_arr32 func_ret_struct_char_arr32() return s; } -// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #1 { +// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK: ret i32 0 transparent_u func_transparent_union_ret() { diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index caae5666de29e..773daf53b2746 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --prefix-filecheck-ir-name VAR --version 5 // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefixes=CHECK,NOCPU // // Check no-optnone and target-cpu behavior @@ -70,9 +70,9 @@ kernel void test_target_features_kernel(global int *i) { // CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata" //. // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@callee -// NOCPU-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define dso_local void @callee( +// NOCPU-SAME: i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // NOCPU-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr @@ -88,9 +88,33 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@test -// NOCPU-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define dso_local amdgpu_kernel void @test( +// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { +// NOCPU-NEXT: [[ENTRY:.*:]] +// NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) +// NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// NOCPU-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// NOCPU-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// NOCPU-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// NOCPU-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// NOCPU-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// NOCPU-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1 +// NOCPU-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1 +// NOCPU-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR10:[0-9]+]] +// NOCPU-NEXT: ret void +// +// +// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone +// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test( +// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // NOCPU-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) // NOCPU-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -213,10 +237,46 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: ret void // // +// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone +// NOCPU-LABEL: define dso_local amdgpu_kernel void @test_target_features_kernel( +// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] { +// NOCPU-NEXT: [[ENTRY:.*:]] +// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR10]] +// NOCPU-NEXT: ret void +// +// +// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone +// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel( +// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] { +// NOCPU-NEXT: [[ENTRY:.*:]] +// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) +// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) +// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) +// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr +// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr +// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr +// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 +// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 +// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 +// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) +// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) +// NOCPU-NEXT: ret void +// +// // NOCPU: Function Attrs: convergent noinline nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke -// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal void @__test_block_invoke( +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7:[0-9]+]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr @@ -233,9 +293,9 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent nounwind -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_kernel -// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !associated [[META7:![0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel( +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -244,9 +304,9 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent noinline nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2 -// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal void @__test_block_invoke_2( +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr @@ -269,9 +329,9 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent nounwind -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel -// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META12:![0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel( +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META14:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -280,9 +340,9 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent noinline nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3 -// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal void @__test_block_invoke_3( +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR7]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -311,9 +371,9 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent nounwind -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel -// NOCPU-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !associated [[META13:![0-9]+]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META15:![0-9]+]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META17:![0-9]+]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel( +// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR8]] !associated [[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] !kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 // NOCPU-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr @@ -322,9 +382,9 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent noinline nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4 -// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal void @__test_block_invoke_4( +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr @@ -335,14 +395,14 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // NOCPU-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 -// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]] +// NOCPU-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR10]] // NOCPU-NEXT: ret void // // // NOCPU: Function Attrs: convergent nounwind -// NOCPU-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel -// NOCPU-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META18:![0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel( +// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // NOCPU-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -350,34 +410,10 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU-NEXT: ret void // // -// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@test_target_features_kernel -// NOCPU-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space [[META19:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META11]] { -// NOCPU-NEXT: entry: -// NOCPU-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// NOCPU-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// NOCPU-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) -// NOCPU-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) -// NOCPU-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) -// NOCPU-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// NOCPU-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr -// NOCPU-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr -// NOCPU-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr -// NOCPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// NOCPU-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8 -// NOCPU-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// NOCPU-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8 -// NOCPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4 -// NOCPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false) -// NOCPU-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) -// NOCPU-NEXT: ret void -// -// // NOCPU: Function Attrs: convergent noinline nounwind optnone -// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke -// NOCPU-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal void @__test_target_features_kernel_block_invoke( +// NOCPU-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR7]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // NOCPU-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr @@ -389,9 +425,9 @@ kernel void test_target_features_kernel(global int *i) { // // // NOCPU: Function Attrs: convergent nounwind -// NOCPU-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel -// NOCPU-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] { -// NOCPU-NEXT: entry: +// NOCPU-LABEL: define internal amdgpu_kernel void @__test_target_features_kernel_block_invoke_kernel( +// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8]] !associated [[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] { +// NOCPU-NEXT: [[ENTRY:.*:]] // NOCPU-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) // NOCPU-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // NOCPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -412,10 +448,12 @@ kernel void test_target_features_kernel(global int *i) { // // // +// +// // GFX900: Function Attrs: convergent norecurse nounwind -// GFX900-LABEL: define {{[^@]+}}@callee -// GFX900-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define dso_local void @callee( +// GFX900-SAME: i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // GFX900-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // GFX900-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr @@ -431,9 +469,33 @@ kernel void test_target_features_kernel(global int *i) { // // // GFX900: Function Attrs: convergent norecurse nounwind -// GFX900-LABEL: define {{[^@]+}}@test -// GFX900-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define dso_local amdgpu_kernel void @test( +// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { +// GFX900-NEXT: [[ENTRY:.*:]] +// GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) +// GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// GFX900-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// GFX900-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// GFX900-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// GFX900-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr +// GFX900-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr +// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]] +// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] +// GFX900-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] +// GFX900-NEXT: [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] +// GFX900-NEXT: call void @__clang_ocl_kern_imp_test(ptr addrspace(1) noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]] +// GFX900-NEXT: ret void +// +// +// GFX900: Function Attrs: convergent norecurse nounwind +// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test( +// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // GFX900-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) // GFX900-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -468,14 +530,14 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr // GFX900-NEXT: [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr // GFX900-NEXT: [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14:![0-9]+]] -// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]] +// GFX900-NEXT: store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]] // GFX900-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]] // GFX900-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7:[0-9]+]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] // GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17:![0-9]+]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] // GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]] // GFX900-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] // GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] @@ -535,12 +597,12 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 5 // GFX900-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: store i64 [[TMP17]], ptr [[BLOCK_CAPTURED19]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR7]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] // GFX900-NEXT: [[TMP18:%.*]] = getelementptr [1 x i64], ptr [[BLOCK_SIZES_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i64 100, ptr [[TMP18]], align 8 // GFX900-NEXT: [[TMP19:%.*]] = call i32 @__enqueue_kernel_varargs(ptr addrspace(1) [[TMP12]], i32 [[TMP13]], ptr addrspace(5) [[VARTMP11]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr [[BLOCK12_ASCAST]], i32 1, ptr [[TMP18]]) -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR7]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR7]] +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK_SIZES]]) #[[ATTR9]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]] // GFX900-NEXT: [[BLOCK_SIZE22:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 0 // GFX900-NEXT: store i32 32, ptr [[BLOCK_SIZE22]], align 8 // GFX900-NEXT: [[BLOCK_ALIGN23:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 1 @@ -559,17 +621,59 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] // GFX900-NEXT: [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]] // GFX900-NEXT: [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]]) -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR7]] -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]] -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]] -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7]] +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]] +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]] +// GFX900-NEXT: ret void +// +// +// GFX900: Function Attrs: convergent norecurse nounwind +// GFX900-LABEL: define dso_local amdgpu_kernel void @test_target_features_kernel( +// GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { +// GFX900-NEXT: [[ENTRY:.*:]] +// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26:![0-9]+]] +// GFX900-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]] +// GFX900-NEXT: call void @__clang_ocl_kern_imp_test_target_features_kernel(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR8]] +// GFX900-NEXT: ret void +// +// +// GFX900: Function Attrs: convergent norecurse nounwind +// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel( +// GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { +// GFX900-NEXT: [[ENTRY:.*:]] +// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) +// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) +// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) +// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr +// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr +// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr +// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr +// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] +// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] +// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] +// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] +// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] +// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]] +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]] +// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]] // GFX900-NEXT: ret void // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke -// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5:[0-9]+]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal void @__test_block_invoke( +// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6:[0-9]+]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 @@ -583,9 +687,9 @@ kernel void test_target_features_kernel(global int *i) { // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_kernel -// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META22:![0-9]+]] !kernel_arg_addr_space [[META23:![0-9]+]] !kernel_arg_access_qual [[META24:![0-9]+]] !kernel_arg_type [[META25:![0-9]+]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26:![0-9]+]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel( +// GFX900-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META28:![0-9]+]] !kernel_arg_addr_space [[META29:![0-9]+]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30:![0-9]+]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -594,9 +698,9 @@ kernel void test_target_features_kernel(global int *i) { // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2 -// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal void @__test_block_invoke_2( +// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 @@ -616,9 +720,9 @@ kernel void test_target_features_kernel(global int *i) { // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel -// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META27:![0-9]+]] !kernel_arg_addr_space [[META23]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META25]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel( +// GFX900-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META31:![0-9]+]] !kernel_arg_addr_space [[META29]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -627,15 +731,15 @@ kernel void test_target_features_kernel(global int *i) { // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3 -// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR5]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal void @__test_block_invoke_3( +// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR6]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // GFX900-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: [[LP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 -// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA28:![0-9]+]] +// GFX900-NEXT: store ptr addrspace(3) [[LP]], ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32:![0-9]+]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 // GFX900-NEXT: [[TMP0:%.*]] = load i8, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA16]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 @@ -648,16 +752,16 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR3]], align 8, !tbaa [[TBAA7]] // GFX900-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[TMP3]], i64 0 // GFX900-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 8, !tbaa [[TBAA3]] -// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA28]] +// GFX900-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr [[LP_ADDR_ASCAST]], align 4, !tbaa [[TBAA32]] // GFX900-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[TMP4]], i64 0 // GFX900-NEXT: store i32 1, ptr addrspace(3) [[ARRAYIDX5]], align 4, !tbaa [[TBAA17]] // GFX900-NEXT: ret void // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel -// GFX900-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !associated [[META29:![0-9]+]] !kernel_arg_addr_space [[META30:![0-9]+]] !kernel_arg_access_qual [[META31:![0-9]+]] !kernel_arg_type [[META32:![0-9]+]] !kernel_arg_base_type [[META32]] !kernel_arg_type_qual [[META33:![0-9]+]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel( +// GFX900-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR6]] !associated [[META33:![0-9]+]] !kernel_arg_addr_space [[META34:![0-9]+]] !kernel_arg_access_qual [[META35:![0-9]+]] !kernel_arg_type [[META36:![0-9]+]] !kernel_arg_base_type [[META36]] !kernel_arg_type_qual [[META37:![0-9]+]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // GFX900-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 // GFX900-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(5) [[TMP2]] to ptr @@ -666,9 +770,9 @@ kernel void test_target_features_kernel(global int *i) { // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4 -// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal void @__test_block_invoke_4( +// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 @@ -676,14 +780,14 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8, !tbaa [[TBAA3]] // GFX900-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8, !tbaa [[TBAA7]] -// GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8:[0-9]+]] +// GFX900-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR8]] // GFX900-NEXT: ret void // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel -// GFX900-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !associated [[META34:![0-9]+]] !kernel_arg_addr_space [[META23]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META25]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel( +// GFX900-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR6]] !associated [[META38:![0-9]+]] !kernel_arg_addr_space [[META29]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // GFX900-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -691,40 +795,10 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: ret void // // -// GFX900: Function Attrs: convergent norecurse nounwind -// GFX900-LABEL: define {{[^@]+}}@test_target_features_kernel -// GFX900-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META35:![0-9]+]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META36:![0-9]+]] !kernel_arg_base_type [[META36]] !kernel_arg_type_qual [[META26]] { -// GFX900-NEXT: entry: -// GFX900-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// GFX900-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) -// GFX900-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) -// GFX900-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) -// GFX900-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) -// GFX900-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr -// GFX900-NEXT: [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr -// GFX900-NEXT: [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr -// GFX900-NEXT: [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr -// GFX900-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr -// GFX900-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA37:![0-9]+]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]] -// GFX900-NEXT: store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] -// GFX900-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]] -// GFX900-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() -// GFX900-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]] -// GFX900-NEXT: [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]] -// GFX900-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]] -// GFX900-NEXT: [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR7]] -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR7]] -// GFX900-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR7]] -// GFX900-NEXT: ret void -// -// // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke -// GFX900-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR5]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal void @__test_target_features_kernel_block_invoke( +// GFX900-SAME: ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR6]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // GFX900-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]] to ptr // GFX900-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR_ASCAST]], align 8 @@ -733,9 +807,9 @@ kernel void test_target_features_kernel(global int *i) { // // // GFX900: Function Attrs: convergent nounwind -// GFX900-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel -// GFX900-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !associated [[META39:![0-9]+]] !kernel_arg_addr_space [[META23]] !kernel_arg_access_qual [[META24]] !kernel_arg_type [[META25]] !kernel_arg_base_type [[META25]] !kernel_arg_type_qual [[META26]] { -// GFX900-NEXT: entry: +// GFX900-LABEL: define internal amdgpu_kernel void @__test_target_features_kernel_block_invoke_kernel( +// GFX900-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR6]] !associated [[META39:![0-9]+]] !kernel_arg_addr_space [[META29]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META30]] !kernel_arg_base_type [[META30]] !kernel_arg_type_qual [[META25]] { +// GFX900-NEXT: [[ENTRY:.*:]] // GFX900-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) // GFX900-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 // GFX900-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr @@ -746,22 +820,25 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } // NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// NOCPU: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// NOCPU: attributes #[[ATTR4]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// NOCPU: attributes #[[ATTR6]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } -// NOCPU: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } -// NOCPU: attributes #[[ATTR8]] = { convergent nounwind } +// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// NOCPU: attributes #[[ATTR4]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } +// NOCPU: attributes #[[ATTR5]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" } +// NOCPU: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// NOCPU: attributes #[[ATTR7]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// NOCPU: attributes #[[ATTR8]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// NOCPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } +// NOCPU: attributes #[[ATTR10]] = { convergent nounwind } //. // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } // GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } // GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" } -// GFX900: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// GFX900: attributes #[[ATTR5]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } -// GFX900: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } -// GFX900: attributes #[[ATTR7]] = { nounwind } +// GFX900: attributes #[[ATTR3]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +// GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } // GFX900: attributes #[[ATTR8]] = { convergent nounwind } +// GFX900: attributes #[[ATTR9]] = { nounwind } //. // NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} // NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} @@ -770,20 +847,20 @@ kernel void test_target_features_kernel(global int *i) { // NOCPU: [[META4]] = !{!"none", !"none", !"none", !"none"} // NOCPU: [[META5]] = !{!"char*", !"char", !"long*", !"long"} // NOCPU: [[META6]] = !{!"", !"", !"", !""} -// NOCPU: [[META7]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle} -// NOCPU: [[META8]] = !{i32 0} -// NOCPU: [[META9]] = !{!"none"} -// NOCPU: [[META10]] = !{!"__block_literal"} -// NOCPU: [[META11]] = !{!""} -// NOCPU: [[META12]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle} -// NOCPU: [[META13]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle} -// NOCPU: [[META14]] = !{i32 0, i32 3} -// NOCPU: [[META15]] = !{!"none", !"none"} -// NOCPU: [[META16]] = !{!"__block_literal", !"void*"} -// NOCPU: [[META17]] = !{!"", !""} -// NOCPU: [[META18]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle} -// NOCPU: [[META19]] = !{i32 1} -// NOCPU: [[META20]] = !{!"int*"} +// NOCPU: [[META7]] = !{i32 1} +// NOCPU: [[META8]] = !{!"none"} +// NOCPU: [[META9]] = !{!"int*"} +// NOCPU: [[META10]] = !{!""} +// NOCPU: [[META11]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle} +// NOCPU: [[META12]] = !{i32 0} +// NOCPU: [[META13]] = !{!"__block_literal"} +// NOCPU: [[META14]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle} +// NOCPU: [[META15]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle} +// NOCPU: [[META16]] = !{i32 0, i32 3} +// NOCPU: [[META17]] = !{!"none", !"none"} +// NOCPU: [[META18]] = !{!"__block_literal", !"void*"} +// NOCPU: [[META19]] = !{!"", !""} +// NOCPU: [[META20]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle} // NOCPU: [[META21]] = !{ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle} //. // GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} @@ -808,23 +885,23 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} // GFX900: [[META20]] = !{!"queue_t", [[META5]], i64 0} // GFX900: [[TBAA_STRUCT21]] = !{i64 0, i64 4, [[TBAA17]]} -// GFX900: [[META22]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle} -// GFX900: [[META23]] = !{i32 0} -// GFX900: [[META24]] = !{!"none"} -// GFX900: [[META25]] = !{!"__block_literal"} -// GFX900: [[META26]] = !{!""} -// GFX900: [[META27]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle} -// GFX900: [[TBAA28]] = !{[[META9]], [[META9]], i64 0} -// GFX900: [[META29]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle} -// GFX900: [[META30]] = !{i32 0, i32 3} -// GFX900: [[META31]] = !{!"none", !"none"} -// GFX900: [[META32]] = !{!"__block_literal", !"void*"} -// GFX900: [[META33]] = !{!"", !""} -// GFX900: [[META34]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle} -// GFX900: [[META35]] = !{i32 1} -// GFX900: [[META36]] = !{!"int*"} -// GFX900: [[TBAA37]] = !{[[META38:![0-9]+]], [[META38]], i64 0} -// GFX900: [[META38]] = !{!"p1 int", [[META9]], i64 0} +// GFX900: [[META22]] = !{i32 1} +// GFX900: [[META23]] = !{!"none"} +// GFX900: [[META24]] = !{!"int*"} +// GFX900: [[META25]] = !{!""} +// GFX900: [[TBAA26]] = !{[[META27:![0-9]+]], [[META27]], i64 0} +// GFX900: [[META27]] = !{!"p1 int", [[META9]], i64 0} +// GFX900: [[META28]] = !{ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle} +// GFX900: [[META29]] = !{i32 0} +// GFX900: [[META30]] = !{!"__block_literal"} +// GFX900: [[META31]] = !{ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle} +// GFX900: [[TBAA32]] = !{[[META9]], [[META9]], i64 0} +// GFX900: [[META33]] = !{ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle} +// GFX900: [[META34]] = !{i32 0, i32 3} +// GFX900: [[META35]] = !{!"none", !"none"} +// GFX900: [[META36]] = !{!"__block_literal", !"void*"} +// GFX900: [[META37]] = !{!"", !""} +// GFX900: [[META38]] = !{ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle} // GFX900: [[META39]] = !{ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle} //. //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl index edf6dbf8657cb..33fee66d6e9fc 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-printf.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl @@ -1,43 +1,86 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4 // RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); -// CHECK-LABEL: @test_printf_noargs( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR4:[0-9]+]] -// CHECK-NEXT: ret void -// __kernel void test_printf_noargs() { printf(""); } -// CHECK-LABEL: @test_printf_int( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR4]] -// CHECK-NEXT: ret void -// __kernel void test_printf_int(int i) { printf("%d", i); } -// CHECK-LABEL: @test_printf_str_int( +__kernel void test_printf_str_int(int i) { + char s[] = "foo"; + printf("%s:%d", s, i); +} +// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_noargs( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_noargs() #[[ATTR5:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_noargs( +// CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) #[[ATTR6:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_int( +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5:![0-9]+]] !kernel_arg_access_qual [[META6:![0-9]+]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8:![0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_int(i32 noundef [[TMP0]]) #[[ATTR5]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_int( +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.1, i32 noundef [[TMP0]]) #[[ATTR6]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local amdgpu_kernel void @test_printf_str_int( +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: call void @__clang_ocl_kern_imp_test_printf_str_int(i32 noundef [[TMP0]]) #[[ATTR5]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_test_printf_str_int( +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META5]] !kernel_arg_access_qual [[META6]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META8]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5) -// CHECK-NEXT: store i32 [[I:%.*]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: store i32 [[I]], ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR7:[0-9]+]] // CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 1 [[S]], ptr addrspace(4) align 1 @__const.test_printf_str_int.s, i64 4, i1 false) // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(5) [[S]], i64 0, i64 0 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]] -// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR5]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[I_ADDR]], align 4, !tbaa [[TBAA9]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str.2, ptr addrspace(5) noundef [[ARRAYDECAY]], i32 noundef [[TMP0]]) #[[ATTR6]] +// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[S]]) #[[ATTR7]] // CHECK-NEXT: ret void // -__kernel void test_printf_str_int(int i) { - char s[] = "foo"; - printf("%s:%d", s, i); -} +//. +// CHECK: [[META4]] = !{} +// CHECK: [[META5]] = !{i32 0} +// CHECK: [[META6]] = !{!"none"} +// CHECK: [[META7]] = !{!"int"} +// CHECK: [[META8]] = !{!""} +// CHECK: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// CHECK: [[META10]] = !{!"int", [[META11:![0-9]+]], i64 0} +// CHECK: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0} +// CHECK: [[META12]] = !{!"Simple C/C++ TBAA"} +//. diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl index d139621ede4e7..5f32231b18c3d 100644 --- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl +++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl @@ -4,14 +4,20 @@ // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM kernel void ker() {}; -// CHECK: define{{.*}}@ker() #0 +// CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]] +// CHECK: call void @__clang_ocl_kern_imp_ker() #[[ATTR2:[0-9]+]] + +// CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]] void foo() {}; -// CHECK: define{{.*}}@foo() #1 +// CHECK: define{{.*}}@foo() #[[ATTR1:[0-9]+]] -// CHECK-LABEL: attributes #0 +// CHECK: attributes #[[ATTR0]] // CHECK-UNIFORM: "uniform-work-group-size"="true" // CHECK-NONUNIFORM: "uniform-work-group-size"="false" -// CHECK-LABEL: attributes #1 +// CHECK: attributes #[[ATTR1]] +// CHECK-NOT: uniform-work-group-size + +// CHECK: attributes #[[ATTR2]] // CHECK-NOT: uniform-work-group-size diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl index 451d30b4d86f0..ea1f734391614 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --include-generated-funcs --version 5 // RUN: %clang_cc1 -fno-ident -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" -fdenormal-fp-math-f32=preserve-sign -cl-uniform-work-group-size | FileCheck --check-prefix=SPIR32 %s // RUN: %clang_cc1 -fno-ident -ffp-exception-behavior=strict -fexperimental-strict-floating-point -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck --check-prefix=STRICTFP %s @@ -21,9 +21,26 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { }); } // SPIR32: Function Attrs: convergent noinline norecurse nounwind optnone -// SPIR32-LABEL: define {{[^@]+}}@device_side_enqueue -// SPIR32-SAME: (ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] { -// SPIR32-NEXT: entry: +// SPIR32-LABEL: define dso_local spir_kernel void @device_side_enqueue( +// SPIR32-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] { +// SPIR32-NEXT: [[ENTRY:.*:]] +// SPIR32-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR32-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR32-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// SPIR32-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 4 +// SPIR32-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 4 +// SPIR32-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 +// SPIR32-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 4 +// SPIR32-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4 +// SPIR32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// SPIR32-NEXT: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 [[TMP0]], ptr addrspace(1) align 4 [[TMP1]], i32 [[TMP2]]) #[[ATTR5:[0-9]+]] +// SPIR32-NEXT: ret void +// +// +// SPIR32: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR32-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_device_side_enqueue( +// SPIR32-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META2]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5]] { +// SPIR32-NEXT: [[ENTRY:.*:]] // SPIR32-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // SPIR32-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // SPIR32-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -60,9 +77,9 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // // // SPIR32: Function Attrs: convergent noinline nounwind optnone -// SPIR32-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke -// SPIR32-SAME: (ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { -// SPIR32-NEXT: entry: +// SPIR32-LABEL: define internal spir_func void @__device_side_enqueue_block_invoke( +// SPIR32-SAME: ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// SPIR32-NEXT: [[ENTRY:.*:]] // SPIR32-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr addrspace(4), align 4 // SPIR32-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr addrspace(4), align 4 // SPIR32-NEXT: store ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 @@ -84,17 +101,34 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // // // SPIR32: Function Attrs: convergent nounwind -// SPIR32-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke_kernel -// SPIR32-SAME: (ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { -// SPIR32-NEXT: entry: +// SPIR32-LABEL: define spir_kernel void @__device_side_enqueue_block_invoke_kernel( +// SPIR32-SAME: ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { +// SPIR32-NEXT: [[ENTRY:.*:]] // SPIR32-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) // SPIR32-NEXT: ret void // // // STRICTFP: Function Attrs: convergent noinline norecurse nounwind optnone strictfp -// STRICTFP-LABEL: define {{[^@]+}}@device_side_enqueue -// STRICTFP-SAME: (ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] { -// STRICTFP-NEXT: entry: +// STRICTFP-LABEL: define dso_local spir_kernel void @device_side_enqueue( +// STRICTFP-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] !kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] { +// STRICTFP-NEXT: [[ENTRY:.*:]] +// STRICTFP-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// STRICTFP-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// STRICTFP-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// STRICTFP-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 4 +// STRICTFP-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 4 +// STRICTFP-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 +// STRICTFP-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 4 +// STRICTFP-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4 +// STRICTFP-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// STRICTFP-NEXT: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 [[TMP0]], ptr addrspace(1) align 4 [[TMP1]], i32 [[TMP2]]) #[[ATTR5:[0-9]+]] +// STRICTFP-NEXT: ret void +// +// +// STRICTFP: Function Attrs: convergent noinline norecurse nounwind optnone strictfp +// STRICTFP-LABEL: define dso_local spir_func void @__clang_ocl_kern_imp_device_side_enqueue( +// STRICTFP-SAME: ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META2]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5]] { +// STRICTFP-NEXT: [[ENTRY:.*:]] // STRICTFP-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // STRICTFP-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // STRICTFP-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -109,7 +143,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-NEXT: store i32 0, ptr [[FLAGS]], align 4 // STRICTFP-NEXT: [[TMP0:%.*]] = load target("spirv.Queue"), ptr [[DEFAULT_QUEUE]], align 4 // STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS]], align 4 -// STRICTFP-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP]], ptr align 4 [[NDRANGE]], i32 4, i1 false) #[[ATTR5:[0-9]+]] +// STRICTFP-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP]], ptr align 4 [[NDRANGE]], i32 4, i1 false) #[[ATTR6:[0-9]+]] // STRICTFP-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 0 // STRICTFP-NEXT: store i32 24, ptr [[BLOCK_SIZE]], align 4 // STRICTFP-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 1 @@ -126,14 +160,14 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4 // STRICTFP-NEXT: store ptr addrspace(1) [[TMP4]], ptr [[BLOCK_CAPTURED2]], align 4 // STRICTFP-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[BLOCK]] to ptr addrspace(4) -// STRICTFP-NEXT: [[TMP6:%.*]] = call spir_func i32 @__enqueue_kernel_basic(target("spirv.Queue") [[TMP0]], i32 [[TMP1]], ptr [[TMP]], ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) [[TMP5]]) #[[ATTR5]] +// STRICTFP-NEXT: [[TMP6:%.*]] = call spir_func i32 @__enqueue_kernel_basic(target("spirv.Queue") [[TMP0]], i32 [[TMP1]], ptr [[TMP]], ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) [[TMP5]]) #[[ATTR6]] // STRICTFP-NEXT: ret void // // // STRICTFP: Function Attrs: convergent noinline nounwind optnone strictfp -// STRICTFP-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke -// STRICTFP-SAME: (ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { -// STRICTFP-NEXT: entry: +// STRICTFP-LABEL: define internal spir_func void @__device_side_enqueue_block_invoke( +// STRICTFP-SAME: ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// STRICTFP-NEXT: [[ENTRY:.*:]] // STRICTFP-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr addrspace(4), align 4 // STRICTFP-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr addrspace(4), align 4 // STRICTFP-NEXT: store ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 @@ -144,7 +178,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // STRICTFP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4 // STRICTFP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP0]], i32 [[TMP1]] // STRICTFP-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX]], align 4 -// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR5]] +// STRICTFP-NEXT: [[TMP3:%.*]] = call float @llvm.experimental.constrained.fmuladd.f32(float 4.000000e+00, float [[TMP2]], float 1.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR6]] // STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 // STRICTFP-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4 // STRICTFP-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 @@ -155,10 +189,10 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // // // STRICTFP: Function Attrs: convergent nounwind -// STRICTFP-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke_kernel -// STRICTFP-SAME: (ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { -// STRICTFP-NEXT: entry: -// STRICTFP-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) #[[ATTR5]] +// STRICTFP-LABEL: define spir_kernel void @__device_side_enqueue_block_invoke_kernel( +// STRICTFP-SAME: ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { +// STRICTFP-NEXT: [[ENTRY:.*:]] +// STRICTFP-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) #[[ATTR6]] // STRICTFP-NEXT: ret void // //. @@ -167,13 +201,15 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) { // SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // SPIR32: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } // SPIR32: attributes #[[ATTR4]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size"="true" } //. // STRICTFP: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone strictfp "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } // STRICTFP: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } // STRICTFP: attributes #[[ATTR2]] = { convergent noinline nounwind optnone strictfp "stack-protector-buffer-size"="8" } // STRICTFP: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind strictfp willreturn memory(inaccessiblemem: readwrite) } // STRICTFP: attributes #[[ATTR4]] = { convergent nounwind "stack-protector-buffer-size"="8" } -// STRICTFP: attributes #[[ATTR5]] = { strictfp } +// STRICTFP: attributes #[[ATTR5]] = { convergent nounwind strictfp "uniform-work-group-size"="false" } +// STRICTFP: attributes #[[ATTR6]] = { strictfp } //. // SPIR32: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // SPIR32: [[META1:![0-9]+]] = !{i32 2, i32 0} diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl index a1408d38a44c9..3355fe1c25819 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -1,12 +1,12 @@ -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86 -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86 -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86 +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86 +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLEX86 #pragma OPENCL EXTENSION cl_khr_subgroups : enable @@ -39,7 +39,13 @@ void callee(int id, __global int *out) { out[id] = id; } -// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) +// TRIPLESPIR: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) +// TRIPLESPIR: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue({{.*}}) + +// TRIPLEX86: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) +// TRIPLEX86: call void @__clang_ocl_kern_imp_device_side_enqueue({{.*}}) + +// COMMON-LABEL: define{{.*}} void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) kernel void device_side_enqueue(global int *a, global int *b, int i) { // SPIR: %default_queue = alloca target("spirv.Queue") // X86: %default_queue = alloca ptr diff --git a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl index b1e45e6d6e6dc..e741cf63f30b5 100644 --- a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl +++ b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl @@ -9,8 +9,15 @@ typedef struct {int a;} ndrange_t; kernel void test(int i) { + // AMDGPU-LABEL: define {{.*}} amdgpu_kernel void @test +// AMDGPU-LABEL: call void @__clang_ocl_kern_imp_test(i32 noundef %0) + // SPIR-LABEL: define {{.*}} spir_kernel void @test +// SPIR-LABEL: call spir_func void @__clang_ocl_kern_imp_test(i32 noundef %0) + +// AMDGPU-LABEL: define {{.*}} void @__clang_ocl_kern_imp_test +// SPIR-LABEL: define {{.*}} spir_func void @__clang_ocl_kern_imp_test // COMMON-LABEL: entry: // AMDGPU: %block_sizes = alloca [1 x i64] @@ -36,6 +43,6 @@ kernel void test(int i) { // Check that the temporary is scoped to the `if` // CHECK-DEBUG: ![[TESTFILE:[0-9]+]] = !DIFile(filename: "" -// CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", {{.*}} file: ![[TESTFILE]] -// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 26) -// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 27, scope: ![[IFSCOPE]]) +// CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", linkageName: "__clang_ocl_kern_imp_test", {{.*}} file: ![[TESTFILE]] +// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 33) +// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 34, scope: ![[IFSCOPE]]) diff --git a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl index 4a7bb8227c339..86ca9ae509073 100644 --- a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl +++ b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s // Check there's no assertion when passing a pointer to an address space @@ -8,57 +8,66 @@ extern void private_ptr(__private int *); extern void local_ptr(__local int *); extern void generic_ptr(__generic int *); +void use_of_private_var() +{ + int x = 0 ; + private_ptr(&x); + generic_ptr(&x); +} + +void addr_of_arg(int x) +{ + private_ptr(&x); + generic_ptr(&x); +} + +__kernel void use_of_local_var() +{ + __local int x; + local_ptr(&x); + generic_ptr(&x); +} + // CHECK-LABEL: define dso_local void @use_of_private_var( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr -// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5:[0-9]+]] // CHECK-NEXT: store i32 0, ptr [[X_ASCAST]], align 4, !tbaa [[TBAA4:![0-9]+]] -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[X_ASCAST]] to ptr addrspace(5) -// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[TMP0]]) #[[ATTR5:[0-9]+]] -// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR5]] -// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR4]] +// CHECK-NEXT: [[X_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ASCAST]] to ptr addrspace(5) +// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X_ASCAST_ASCAST]]) #[[ATTR6:[0-9]+]] +// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR6]] +// CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5]] // CHECK-NEXT: ret void // -void use_of_private_var() -{ - int x = 0 ; - private_ptr(&x); - generic_ptr(&x); -} - +// // CHECK-LABEL: define dso_local void @addr_of_arg( // CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr // CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4, !tbaa [[TBAA4]] -// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[X_ADDR_ASCAST]] to ptr addrspace(5) -// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[TMP0]]) #[[ATTR5]] -// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ADDR_ASCAST]]) #[[ATTR5]] +// CHECK-NEXT: [[X_ADDR_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR_ASCAST]] to ptr addrspace(5) +// CHECK-NEXT: call void @private_ptr(ptr addrspace(5) noundef [[X_ADDR_ASCAST_ASCAST]]) #[[ATTR6]] +// CHECK-NEXT: call void @generic_ptr(ptr noundef [[X_ADDR_ASCAST]]) #[[ATTR6]] // CHECK-NEXT: ret void // -void addr_of_arg(int x) -{ - private_ptr(&x); - generic_ptr(&x); -} - +// // CHECK-LABEL: define dso_local amdgpu_kernel void @use_of_local_var( // CHECK-SAME: ) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: call void @local_ptr(ptr addrspace(3) noundef @use_of_local_var.x) #[[ATTR5]] -// CHECK-NEXT: call void @generic_ptr(ptr noundef addrspacecast (ptr addrspace(3) @use_of_local_var.x to ptr)) #[[ATTR5]] +// CHECK-NEXT: call void @__clang_ocl_kern_imp_use_of_local_var() #[[ATTR6]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_use_of_local_var( +// CHECK-SAME: ) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: call void @local_ptr(ptr addrspace(3) noundef @use_of_local_var.x) #[[ATTR6]] +// CHECK-NEXT: call void @generic_ptr(ptr noundef addrspacecast (ptr addrspace(3) @use_of_local_var.x to ptr)) #[[ATTR6]] // CHECK-NEXT: ret void // -__kernel void use_of_local_var() -{ - __local int x; - local_ptr(&x); - generic_ptr(&x); -} - //. // CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} // CHECK: [[META5]] = !{!"int", [[META6:![0-9]+]], i64 0} diff --git a/clang/test/CodeGenOpenCL/kernel-arg-info.cl b/clang/test/CodeGenOpenCL/kernel-arg-info.cl index dbb59af9470ce..3a2284223a105 100644 --- a/clang/test/CodeGenOpenCL/kernel-arg-info.cl +++ b/clang/test/CodeGenOpenCL/kernel-arg-info.cl @@ -21,6 +21,7 @@ kernel void foo(global int * globalintp, global int * restrict globalintrestrict *globalintrestrictp = constint + volatileint; } // CHECK: define{{.*}} spir_kernel void @foo{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[MD11:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[MD12:[0-9]+]] // CHECK: !kernel_arg_type ![[MD13:[0-9]+]] @@ -32,6 +33,7 @@ kernel void foo(global int * globalintp, global int * restrict globalintrestrict kernel void foo2(read_only image1d_t img1, image2d_t img2, write_only image2d_array_t img3, read_write image1d_t img4) { } // CHECK: define{{.*}} spir_kernel void @foo2{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo2{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[MD21:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[MD22:[0-9]+]] // CHECK: !kernel_arg_type ![[MD23:[0-9]+]] @@ -43,6 +45,7 @@ kernel void foo2(read_only image1d_t img1, image2d_t img2, write_only image2d_ar kernel void foo3(__global half * X) { } // CHECK: define{{.*}} spir_kernel void @foo3{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo3{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[MD31:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[MD32:[0-9]+]] // CHECK: !kernel_arg_type ![[MD33:[0-9]+]] @@ -55,6 +58,7 @@ typedef unsigned int myunsignedint; kernel void foo4(__global unsigned int * X, __global myunsignedint * Y) { } // CHECK: define{{.*}} spir_kernel void @foo4{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo4{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[MD41:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[MD42:[0-9]+]] // CHECK: !kernel_arg_type ![[MD43:[0-9]+]] @@ -67,6 +71,7 @@ typedef image1d_t myImage; kernel void foo5(myImage img1, write_only image1d_t img2) { } // CHECK: define{{.*}} spir_kernel void @foo5{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo5{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[MD41:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[MD51:[0-9]+]] // CHECK: !kernel_arg_type ![[MD52:[0-9]+]] @@ -77,6 +82,8 @@ kernel void foo5(myImage img1, write_only image1d_t img2) { typedef char char16 __attribute__((ext_vector_type(16))); __kernel void foo6(__global char16 arg[]) {} +// CHECK: define{{.*}} spir_kernel void @foo6{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo6{{[^!]+}} // CHECK: !kernel_arg_type ![[MD61:[0-9]+]] // ARGINFO: !kernel_arg_name ![[MD62:[0-9]+]] @@ -87,6 +94,7 @@ kernel void foo7(ROImage ro, WOImage wo, RWImage rw) { } // CHECK: define{{.*}} spir_kernel void @foo7{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo7{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[MD71:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[MD72:[0-9]+]] // CHECK: !kernel_arg_type ![[MD73:[0-9]+]] @@ -99,6 +107,7 @@ typedef unsigned char uchar; typedef uchar uchar2 __attribute__((ext_vector_type(2))); kernel void foo8(pipe int p1, pipe uchar p2, pipe uchar2 p3, const pipe uchar p4, write_only pipe uchar p5) {} // CHECK: define{{.*}} spir_kernel void @foo8{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo8{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[PIPE_AS_QUAL:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[PIPE_ACCESS_QUAL:[0-9]+]] // CHECK: !kernel_arg_type ![[PIPE_TY:[0-9]+]] @@ -109,6 +118,7 @@ kernel void foo8(pipe int p1, pipe uchar p2, pipe uchar2 p3, const pipe uchar p4 kernel void foo9(signed char sc1, global const signed char* sc2) {} // CHECK: define{{.*}} spir_kernel void @foo9{{[^!]+}} +// ARGINFO: define{{.*}} spir_kernel void @foo9{{[^!]+}} // CHECK: !kernel_arg_addr_space ![[SCHAR_AS_QUAL:[0-9]+]] // CHECK: !kernel_arg_access_qual ![[MD42]] // CHECK: !kernel_arg_type ![[SCHAR_TY:[0-9]+]] diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl new file mode 100644 index 0000000000000..cdbe510b723b9 --- /dev/null +++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl @@ -0,0 +1,959 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --version 4 +// RUN: %clang_cc1 -O0 -triple i686-pc-darwin -emit-llvm -o - %s | FileCheck -check-prefix=X86 %s +// RUN: %clang_cc1 -O0 -triple amdgcn -emit-llvm -o - %s | FileCheck -check-prefix=AMDGCN %s + +#pragma OPENCL EXTENSION __cl_clang_function_pointers : enable + +typedef int int2 __attribute__((ext_vector_type(2))); + +typedef struct { + int cells[9]; +} Mat3X3; + +typedef struct { + int cells[16]; +} Mat4X4; + +typedef struct { + int cells[1024]; +} Mat32X32; + +typedef struct { + int cells[4096]; +} Mat64X64; + +struct StructOneMember { + int2 x; +}; + +struct StructTwoMember { + int2 x; + int2 y; +}; + +struct LargeStructOneMember { + int2 x[100]; +}; + +struct LargeStructTwoMember { + int2 x[40]; + int2 y[20]; +}; + +Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { + Mat4X4 out; + return out; +} + +Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { + Mat64X64 out; + return out; +} + +void FuncOneMember(struct StructOneMember u) { + u.x = (int2)(0, 0); +} + +void FuncOneLargeMember(struct LargeStructOneMember u) { + u.x[0] = (int2)(0, 0); +} + +void FuncTwoMember(struct StructTwoMember u) { + u.y = (int2)(0, 0); +} + +void FuncLargeTwoMember(struct LargeStructTwoMember u) { + u.y[0] = (int2)(0, 0); +} + +__attribute__((noinline)) kernel void callee_kern(global int *A){ + *A = 1; +} + +kernel void callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out) { + out[0] = foo(in[1]); +} + +kernel void callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out) { + out[0] = foo_large(in[1]); +} + +kernel void KernelOneMember(struct StructOneMember u) { + FuncOneMember(u); +} + +kernel void KernelLargeOneMember(struct LargeStructOneMember u) { + FuncOneLargeMember(u); +} + +kernel void KernelTwoMember(struct StructTwoMember u) { + FuncTwoMember(u); +} + +kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { + FuncLargeTwoMember(u); +} + +__attribute__((noinline)) kernel void ext_callee_kern(global int *A); + +kernel void ext_callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out); + +kernel void ext_callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out); + +kernel void ext_KernelOneMember(struct StructOneMember u); + +kernel void ext_KernelLargeOneMember(struct LargeStructOneMember u); + +kernel void ext_KernelTwoMember(struct StructTwoMember u); + +kernel void ext_KernelLargeTwoMember(struct LargeStructTwoMember u); + +kernel void caller_kern(global int* A, global Mat3X3 *mat3X3, global Mat4X4 *mat4X4, global Mat32X32 *mat32X32, global Mat64X64 *mat64X64){ + callee_kern(A); + ext_callee_kern(A); + + callee_kern_Mat3X3(mat3X3, mat4X4); + callee_kern_Mat32X32(mat32X32, mat64X64); + ext_callee_kern_Mat3X3(mat3X3, mat4X4); + ext_callee_kern_Mat32X32(mat32X32, mat64X64); +} + +kernel void caller_kern2(struct StructOneMember structOneMem, global struct StructOneMember* global_structOneMem, struct StructTwoMember structTwoMem){ + KernelOneMember(structOneMem); + ext_KernelOneMember(structOneMem); + KernelTwoMember(structTwoMem); + ext_KernelTwoMember(structTwoMem); +} + +kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct LargeStructTwoMember largeStructTwoMem){ + KernelLargeOneMember(largeStructOneMem); + KernelLargeTwoMember(largeStructTwoMem); + ext_KernelLargeOneMember(largeStructOneMem); + ext_KernelLargeTwoMember(largeStructTwoMem); +} +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @foo( +// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { +// X86-NEXT: entry: +// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @foo_large( +// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncOneLargeMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @FuncLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) +// X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 +// X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 +// X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @callee_kern( +// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4:[0-9]+]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern( +// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: store i32 1, ptr [[TMP0]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @callee_kern_Mat3X3( +// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { +// X86-NEXT: entry: +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_Mat3X3( +// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] { +// X86-NEXT: entry: +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 +// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr [[TMP0]], i32 0 +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr [[TMP1]], i32 1 +// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]] +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @callee_kern_Mat32X32( +// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { +// X86-NEXT: entry: +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_Mat32X32( +// X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { +// X86-NEXT: entry: +// X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 +// X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 +// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr [[TMP0]], i32 0 +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 +// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32:%.*]], ptr [[TMP1]], i32 1 +// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]] +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelLargeOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeOneMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) +// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @KernelLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) +// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @caller_kern( +// X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META18:![0-9]+]] !kernel_arg_access_qual [[META19:![0-9]+]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21:![0-9]+]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT3X3]], ptr [[MAT3X3_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT4X4]], ptr [[MAT4X4_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 +// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 +// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 +// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern( +// X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT3X3]], ptr [[MAT3X3_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT4X4]], ptr [[MAT4X4_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4 +// X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4]] +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR4]] +// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 +// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]]) #[[ATTR4]] +// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 +// X86-NEXT: [[TMP5:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP4]], ptr noundef align 4 [[TMP5]]) #[[ATTR4]] +// X86-NEXT: [[TMP6:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 +// X86-NEXT: [[TMP7:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP6]], ptr noundef align 4 [[TMP7]]) #[[ATTR4]] +// X86-NEXT: [[TMP8:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 +// X86-NEXT: [[TMP9:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP8]], ptr noundef align 4 [[TMP9]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @caller_kern2( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[STRUCTONEMEM:%.*]], ptr noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[STRUCTTWOMEM:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { +// X86-NEXT: entry: +// X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern2( +// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]], ptr noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP1:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { +// X86-NEXT: entry: +// X86-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 +// X86-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 +// X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTONEMEM]], ptr align 4 [[TMP0]], i32 8, i1 false) +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 16, i1 false) +// X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @caller_kern3( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[LARGESTRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[LARGESTRUCTTWOMEM:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { +// X86-NEXT: entry: +// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern3( +// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP1:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META26]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { +// X86-NEXT: entry: +// X86-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 +// X86-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTONEMEM]], ptr align 4 [[TMP0]], i32 800, i1 false) +// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 480, i1 false) +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( +// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 +// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 +// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @foo_large( +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 +// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR5:[0-9]+]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_Mat3X3( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_Mat3X3( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 +// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 +// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_Mat32X32( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_Mat32X32( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( +// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember( +// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 +// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 +// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) +// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]], ptr addrspace(1) noundef align 4 [[MAT3X3:%.*]], ptr addrspace(1) noundef align 4 [[MAT4X4:%.*]], ptr addrspace(1) noundef align 4 [[MAT32X32:%.*]], ptr addrspace(1) noundef align 4 [[MAT64X64:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META18:![0-9]+]] !kernel_arg_access_qual [[META19:![0-9]+]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21:![0-9]+]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT3X3]], ptr addrspace(5) [[MAT3X3_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT4X4]], ptr addrspace(5) [[MAT4X4_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT32X32]], ptr addrspace(5) [[MAT32X32_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT64X64]], ptr addrspace(5) [[MAT64X64_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]], ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]], ptr addrspace(1) noundef align 4 [[TMP4]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]], ptr addrspace(1) noundef align 4 [[MAT3X3:%.*]], ptr addrspace(1) noundef align 4 [[MAT4X4:%.*]], ptr addrspace(1) noundef align 4 [[MAT32X32:%.*]], ptr addrspace(1) noundef align 4 [[MAT64X64:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT3X3]], ptr addrspace(5) [[MAT3X3_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT4X4]], ptr addrspace(5) [[MAT4X4_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT32X32]], ptr addrspace(5) [[MAT32X32_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[MAT64X64]], ptr addrspace(5) [[MAT64X64_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP4]], ptr addrspace(1) noundef align 4 [[TMP5]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP6]], ptr addrspace(1) noundef align 4 [[TMP7]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP8]], ptr addrspace(1) noundef align 4 [[TMP9]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern2( +// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], [[STRUCT_STRUCTTWOMEMBER:%.*]] [[STRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 0 +// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 1 +// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP5]], ptr addrspace(1) noundef align 8 [[TMP4]], <2 x i32> [[TMP7]], <2 x i32> [[TMP9]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern2( +// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], <2 x i32> [[STRUCTTWOMEM_COERCE0:%.*]], <2 x i32> [[STRUCTTWOMEM_COERCE1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 +// AMDGCN-NEXT: store <2 x i32> [[STRUCTTWOMEM_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 +// AMDGCN-NEXT: store <2 x i32> [[STRUCTTWOMEM_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 +// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP2]]) #[[ATTR5]] +// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE2]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(<2 x i32> [[TMP3]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR5]] +// AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8 +// AMDGCN-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP11:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP10]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(<2 x i32> [[TMP9]], <2 x i32> [[TMP11]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern3( +// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[LARGESTRUCTONEMEM_COERCE:%.*]], [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[LARGESTRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[LARGESTRUCTONEMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[LARGESTRUCTONEMEM_COERCE]], 0 +// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 0 +// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 1 +// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 1 +// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP5]], ptr addrspace(5) [[TMP4]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern3( +// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META26]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) +// AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(5) align 8 [[TMP1]], i64 480, i1 false) +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]] +// AMDGCN-NEXT: ret void +// +//. +// X86: [[META4]] = !{i32 1} +// X86: [[META5]] = !{!"none"} +// X86: [[META6]] = !{!"int*"} +// X86: [[META7]] = !{!""} +// X86: [[META8]] = !{i32 1, i32 1} +// X86: [[META9]] = !{!"none", !"none"} +// X86: [[META10]] = !{!"Mat3X3*", !"Mat4X4*"} +// X86: [[META11]] = !{!"", !""} +// X86: [[META12]] = !{!"Mat32X32*", !"Mat64X64*"} +// X86: [[META13]] = !{i32 0} +// X86: [[META14]] = !{!"struct StructOneMember"} +// X86: [[META15]] = !{!"struct LargeStructOneMember"} +// X86: [[META16]] = !{!"struct StructTwoMember"} +// X86: [[META17]] = !{!"struct LargeStructTwoMember"} +// X86: [[META18]] = !{i32 1, i32 1, i32 1, i32 1, i32 1} +// X86: [[META19]] = !{!"none", !"none", !"none", !"none", !"none"} +// X86: [[META20]] = !{!"int*", !"Mat3X3*", !"Mat4X4*", !"Mat32X32*", !"Mat64X64*"} +// X86: [[META21]] = !{!"", !"", !"", !"", !""} +// X86: [[META22]] = !{i32 0, i32 1, i32 0} +// X86: [[META23]] = !{!"none", !"none", !"none"} +// X86: [[META24]] = !{!"struct StructOneMember", !"struct StructOneMember*", !"struct StructTwoMember"} +// X86: [[META25]] = !{!"", !"", !""} +// X86: [[META26]] = !{i32 0, i32 0} +// X86: [[META27]] = !{!"struct LargeStructOneMember", !"struct LargeStructTwoMember"} +//. +// AMDGCN: [[META4]] = !{i32 1} +// AMDGCN: [[META5]] = !{!"none"} +// AMDGCN: [[META6]] = !{!"int*"} +// AMDGCN: [[META7]] = !{!""} +// AMDGCN: [[META8]] = !{i32 1, i32 1} +// AMDGCN: [[META9]] = !{!"none", !"none"} +// AMDGCN: [[META10]] = !{!"Mat3X3*", !"Mat4X4*"} +// AMDGCN: [[META11]] = !{!"", !""} +// AMDGCN: [[META12]] = !{!"Mat32X32*", !"Mat64X64*"} +// AMDGCN: [[META13]] = !{i32 0} +// AMDGCN: [[META14]] = !{!"struct StructOneMember"} +// AMDGCN: [[META15]] = !{!"struct LargeStructOneMember"} +// AMDGCN: [[META16]] = !{!"struct StructTwoMember"} +// AMDGCN: [[META17]] = !{!"struct LargeStructTwoMember"} +// AMDGCN: [[META18]] = !{i32 1, i32 1, i32 1, i32 1, i32 1} +// AMDGCN: [[META19]] = !{!"none", !"none", !"none", !"none", !"none"} +// AMDGCN: [[META20]] = !{!"int*", !"Mat3X3*", !"Mat4X4*", !"Mat32X32*", !"Mat64X64*"} +// AMDGCN: [[META21]] = !{!"", !"", !"", !"", !""} +// AMDGCN: [[META22]] = !{i32 0, i32 1, i32 0} +// AMDGCN: [[META23]] = !{!"none", !"none", !"none"} +// AMDGCN: [[META24]] = !{!"struct StructOneMember", !"struct StructOneMember*", !"struct StructTwoMember"} +// AMDGCN: [[META25]] = !{!"", !"", !""} +// AMDGCN: [[META26]] = !{i32 0, i32 0} +// AMDGCN: [[META27]] = !{!"struct LargeStructOneMember", !"struct LargeStructTwoMember"} +//. diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl index f5b618f6a35d3..4abb40aa3ed50 100644 --- a/clang/test/CodeGenOpenCL/reflect.cl +++ b/clang/test/CodeGenOpenCL/reflect.cl @@ -1,6 +1,13 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 4 // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s +bool device_function() { + return __nvvm_reflect("__CUDA_ARCH") >= 700; +} + +__kernel void kernel_function(__global int *i) { + *i = device_function(); +} // CHECK-LABEL: define dso_local zeroext i1 @device_function( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: @@ -8,24 +15,28 @@ // CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700 // CHECK-NEXT: ret i1 [[CMP]] // -bool device_function() { - return __nvvm_reflect("__CUDA_ARCH") >= 700; -} - +// // CHECK-LABEL: define dso_local ptx_kernel void @kernel_function( // CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4 -// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4 +// CHECK-NEXT: call void @__clang_ocl_kern_imp_kernel_function(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local ptx_kernel void @__clang_ocl_kern_imp_kernel_function( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3]] // CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4 // CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4 // CHECK-NEXT: ret void // -__kernel void kernel_function(__global int *i) { - *i = device_function(); -} //. // CHECK: [[META3]] = !{i32 1} // CHECK: [[META4]] = !{!"none"} diff --git a/clang/test/CodeGenOpenCL/sampler.cl b/clang/test/CodeGenOpenCL/sampler.cl index 3649afba50fe2..227242be73c5e 100644 --- a/clang/test/CodeGenOpenCL/sampler.cl +++ b/clang/test/CodeGenOpenCL/sampler.cl @@ -36,8 +36,12 @@ void fnc4smp(sampler_t s) {} kernel void foo(sampler_t smp_par) { // CHECK-SPIR-LABEL: define{{.*}} spir_kernel void @foo(target("spirv.Sampler") %smp_par) + // CHECK-SPIR: call spir_func void @__clang_ocl_kern_imp_foo(target("spirv.Sampler") %0) + // CHECK-SPIR-LABEL: define{{.*}} spir_func void @__clang_ocl_kern_imp_foo(target("spirv.Sampler") %smp_par) // CHECK-SPIR: [[smp_par_ptr:%[A-Za-z0-9_\.]+]] = alloca target("spirv.Sampler") // CHECK-X86-LABEL: define{{.*}} spir_kernel void @foo(ptr %smp_par) + // CHECK-X86: call void @__clang_ocl_kern_imp_foo(ptr %0) + // CHECK-X86-LABEL: define{{.*}} void @__clang_ocl_kern_imp_foo(ptr %smp_par) // CHECK-X86: [[smp_par_ptr:%[A-Za-z0-9_\.]+]] = alloca ptr // Case 2b diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl index 569ea0cbe1af6..56e1b662a7b4e 100644 --- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl +++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl @@ -4,15 +4,18 @@ int get_dummy_id(int D); kernel void bar(global int *A); +//CHECK: define{{.*}} spir_kernel void @foo(ptr addrspace(1) noundef align 4 %A) +//CHECK: tail call spir_func void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A) + kernel void foo(global int *A) -// CHECK: define{{.*}} spir_kernel void @foo(ptr addrspace(1) noundef align 4 %A) +// CHECK: define{{.*}} spir_func void @__clang_ocl_kern_imp_foo(ptr addrspace(1) noundef align 4 %A) { int id = get_dummy_id(0); // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0) A[id] = id; bar(A); - // CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A) + // CHECK: tail call spir_func void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A) } // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef) -// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4) +// CHECK: declare spir_func void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4) diff --git a/clang/test/CodeGenOpenCL/visibility.cl b/clang/test/CodeGenOpenCL/visibility.cl index addfe33377f93..31807aacf8c2a 100644 --- a/clang/test/CodeGenOpenCL/visibility.cl +++ b/clang/test/CodeGenOpenCL/visibility.cl @@ -37,22 +37,33 @@ __attribute__((visibility("protected"))) extern int ext_protected; // FVIS-PROTECTED: @ext_default = external local_unnamed_addr // FVIS-HIDDEN: @ext_default = external local_unnamed_addr __attribute__((visibility("default"))) extern int ext_default; - // FVIS-DEFAULT: define{{.*}} amdgpu_kernel void @kern() // FVIS-PROTECTED: define protected amdgpu_kernel void @kern() // FVIS-HIDDEN: define protected amdgpu_kernel void @kern() +// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern() +// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern() +// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern() kernel void kern() {} // FVIS-DEFAULT: define protected amdgpu_kernel void @kern_hidden() // FVIS-PROTECTED: define protected amdgpu_kernel void @kern_hidden() // FVIS-HIDDEN: define protected amdgpu_kernel void @kern_hidden() +// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_hidden() +// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_hidden() +// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_hidden() __attribute__((visibility("hidden"))) kernel void kern_hidden() {} // FVIS-DEFAULT: define protected amdgpu_kernel void @kern_protected() // FVIS-PROTECTED: define protected amdgpu_kernel void @kern_protected() // FVIS-HIDDEN: define protected amdgpu_kernel void @kern_protected() +// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_protected() +// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_protected() +// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_protected() __attribute__((visibility("protected"))) kernel void kern_protected() {} // FVIS-DEFAULT: define{{.*}} amdgpu_kernel void @kern_default() // FVIS-PROTECTED: define{{.*}} amdgpu_kernel void @kern_default() // FVIS-HIDDEN: define{{.*}} amdgpu_kernel void @kern_default() +// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern_default() +// FVIS-PROTECTED: define{{.*}} void @__clang_ocl_kern_imp_kern_default() +// FVIS-HIDDEN: define{{.*}} void @__clang_ocl_kern_imp_kern_default() __attribute__((visibility("default"))) kernel void kern_default() {} // FVIS-DEFAULT: define{{.*}} void @func() @@ -85,31 +96,42 @@ __attribute__((visibility("default"))) extern void ext_func_default(); void use() { glob = ext + ext_hidden + ext_protected + ext_default; ext_kern(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern() ext_kern_hidden(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_hidden() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_hidden() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_hidden() ext_kern_protected(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_protected() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_protected() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_protected() ext_kern_default(); + // FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_default() + // FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_default() + // FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_default() ext_func(); ext_func_hidden(); ext_func_protected(); ext_func_default(); } -// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern() -// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern() -// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern() - -// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden() -// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden() -// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden() +// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern() +// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern() +// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern() -// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected() -// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected() -// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected() +// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden() +// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden() +// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden() -// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default() -// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default() -// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default() +// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected() +// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_protected() +// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_protected() +// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default() +// FVIS-PROTECTED: declare void @__clang_ocl_kern_imp_ext_kern_default() +// FVIS-HIDDEN: declare void @__clang_ocl_kern_imp_ext_kern_default() // FVIS-DEFAULT: declare void @ext_func() // FVIS-PROTECTED: declare protected void @ext_func() @@ -126,3 +148,6 @@ void use() { // FVIS-DEFAULT: declare void @ext_func_default() // FVIS-PROTECTED: declare void @ext_func_default() // FVIS-HIDDEN: declare void @ext_func_default() + + + diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-of-this.clcpp b/clang/test/CodeGenOpenCLCXX/addrspace-of-this.clcpp index 2f1b6c196fd51..8395a40095b94 100644 --- a/clang/test/CodeGenOpenCLCXX/addrspace-of-this.clcpp +++ b/clang/test/CodeGenOpenCLCXX/addrspace-of-this.clcpp @@ -125,9 +125,6 @@ __kernel void test__global() { // EXPL: call spir_func void @_ZNU3AS41CC1EOU3AS4S_(ptr addrspace(4) {{[^,]*}} [[C5GEN]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[CALL]]) // IMPL: call void @llvm.memcpy.p0.p4.i32(ptr {{.*}}%c5, ptr addrspace(4) {{.*}}[[CALL]] -// Tests address space of inline members -//COMMON: @_ZNU3AS41C3getEv(ptr addrspace(4) {{[^,]*}} %this) -//COMMON: @_ZNU3AS41CplERU3AS4KS_(ptr dead_on_unwind noalias writable sret(%class.C) align 4 %agg.result, ptr addrspace(4) {{[^,]*}} %this #define TEST(AS) \ __kernel void test##AS() { \ AS C c; \ @@ -190,6 +187,10 @@ TEST(__private) // EXPL: call spir_func noundef align 4 dereferenceable(4) ptr addrspace(4) @_ZNU3AS41CaSERU3AS4KS_(ptr addrspace(4) {{[^,]*}} [[C2GEN]], ptr addrspace(4) noundef align 4 dereferenceable(4) [[C1GEN]]) // IMPL: call void @llvm.memcpy.p4.p4.i32(ptr addrspace(4) {{.*}}[[C2GEN]], ptr addrspace(4) {{.*}}[[C1GEN]] +// Tests address space of inline members +//COMMON: @_ZNU3AS41C3getEv(ptr addrspace(4) {{[^,]*}} %this) +//COMMON: @_ZNU3AS41CplERU3AS4KS_(ptr dead_on_unwind noalias writable sret(%class.C) align 4 %agg.result, ptr addrspace(4) {{[^,]*}} %this + // Test that calling a const method from a non-const method does not crash Clang. class ConstAndNonConstMethod { public: diff --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl index f15130d5f8b61..9331afc305c25 100644 --- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl +++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O1 -verify %s -o /dev/null // expected-remark@+10 {{Function Name: foo}} // expected-remark@+9 {{ TotalSGPRs: 13}}