diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index d8964749dc483..ac90175c73f65 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -259,6 +259,22 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( addNVVMMetadata(F, "maxntidx", MWGS->getZDimVal()); addNVVMMetadata(F, "maxntidy", MWGS->getYDimVal()); addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal()); + } else if (auto Attr = F->getFnAttribute("sycl-max-work-group-size"); + Attr.isValid()) { + // Split values in the comma-separated list integers. + SmallVector ValStrs; + Attr.getValueAsString().split(ValStrs, ','); + assert(ValStrs.size() == 3 && "Must have all three dimensions for " + "sycl-max-work-group-size property"); + + static constexpr const char *Annots[] = {"maxntidx", "maxntidy", + "maxntidz"}; + for (auto [AnnotStr, ValStr] : zip(Annots, reverse(ValStrs))) { + int Value = 0; + bool Error = ValStr.getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + addNVVMMetadata(F, AnnotStr, Value); + } } auto attrValue = [&](Expr *E) { @@ -278,6 +294,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue())); HasMinWorkGroupPerCU = true; } + } else if (auto Attr = + F->getFnAttribute("sycl-min-work-groups-per-multiprocessor"); + Attr.isValid()) { + int Value = 0; + bool Error = Attr.getValueAsString().getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + addNVVMMetadata(F, "minctasm", Value); } if (const auto *MWGPMP = @@ -291,6 +314,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // The value is guaranteed to be > 0, pass it to the metadata. addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue())); } + } else if (auto Attr = + F->getFnAttribute("sycl-max-work-groups-per-cluster"); + Attr.isValid()) { + int Value = 0; + bool Error = Attr.getValueAsString().getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + addNVVMMetadata(F, "maxclusterrank", Value); } } diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 36adf1e52ff56..99721393f17f3 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -361,18 +361,29 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64); } - if (AttrKindStr == "sycl-work-group-size" || - AttrKindStr == "sycl-work-group-size-hint") { + static constexpr std::tuple + SimpleWGAttrs[] = { + {"sycl-work-group-size", "reqd_work_group_size", + /*RequiresAll3Dims*/ false}, + {"sycl-work-group-size-hint", "work_group_size_hint", + /*RequiresAll3Dims*/ false}, + {"sycl-max-work-group-size", "max_work_group_size", + /*RequiresAll3Dims*/ true}, + }; + + for (auto &[AttrKind, MDStr, Req3D] : SimpleWGAttrs) { + if (AttrKindStr != AttrKind) + continue; // Split values in the comma-separated list integers. - SmallVector ValStrs; - Attr.getValueAsString().split(ValStrs, ','); + SmallVector AttrValStrs; + Attr.getValueAsString().split(AttrValStrs, ','); - assert(ValStrs.size() <= 3 && - "sycl-work-group-size and sycl-work-group-size-hint currently only " - "support up to three values"); + assert(((Req3D && AttrValStrs.size() == 3) || + (!Req3D && AttrValStrs.size() <= 3)) && + "Incorrect number of values for kernel property"); // SYCL work-group sizes must be reversed for SPIR-V. - std::reverse(ValStrs.begin(), ValStrs.end()); + std::reverse(AttrValStrs.begin(), AttrValStrs.end()); // Use integer pointer size as closest analogue to size_t. IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx); @@ -381,24 +392,29 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { // Get the integers from the strings. SmallVector MDVals; - for (StringRef ValStr : ValStrs) + for (StringRef ValStr : AttrValStrs) MDVals.push_back(ConstantAsMetadata::get( Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10)))); - const char *MDName = (AttrKindStr == "sycl-work-group-size") - ? "reqd_work_group_size" - : "work_group_size_hint"; - return std::pair(MDName, MDNode::get(Ctx, MDVals)); + return std::pair(MDStr, MDNode::get(Ctx, MDVals)); } - if (AttrKindStr == "sycl-sub-group-size") { - uint32_t SubGroupSize = getAttributeAsInteger(Attr); - IntegerType *Ty = Type::getInt32Ty(Ctx); - Metadata *MDVal = ConstantAsMetadata::get( - Constant::getIntegerValue(Ty, APInt(32, SubGroupSize))); - SmallVector MD{MDVal}; - return std::pair("intel_reqd_sub_group_size", - MDNode::get(Ctx, MD)); + static constexpr std::pair SimpleI32Attrs[] = { + {"sycl-sub-group-size", "intel_reqd_sub_group_size"}, + {"sycl-min-work-groups-per-multiprocessor", + "min_work_groups_per_multiprocessor"}, + {"sycl-max-work-groups-per-cluster", "max_work_groups_per_cluster"}, + }; + + for (auto [AttrKind, MDStr] : SimpleI32Attrs) { + if (AttrKindStr == AttrKind) { + uint32_t SubGroupSize = getAttributeAsInteger(Attr); + IntegerType *Ty = Type::getInt32Ty(Ctx); + Metadata *MDVal = ConstantAsMetadata::get( + Constant::getIntegerValue(Ty, APInt(32, SubGroupSize))); + SmallVector MD{MDVal}; + return std::pair(MDStr, MDNode::get(Ctx, MD)); + } } // The sycl-single-task attribute currently only has an effect when targeting diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index e46ab88c43172..7fea0970ce26e 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -71,6 +71,31 @@ struct single_task_kernel_key { using value_t = property_value; }; +struct max_work_group_size_key + : detail::compile_time_property_key { + template + using value_t = property_value, + std::integral_constant, + std::integral_constant>; +}; + +struct min_work_groups_per_multiprocessor_key + : detail::compile_time_property_key< + detail::PropKind::MinWorkGroupsPerMultiprocessor> { + template + using value_t = property_value>; +}; + +struct max_work_groups_per_cluster_key + : detail::compile_time_property_key< + detail::PropKind::MaxWorkGroupsPerCluster> { + template + using value_t = property_value>; +}; + template struct property_value, std::integral_constant...> { @@ -138,6 +163,44 @@ template <> struct property_value { using key_t = single_task_kernel_key; }; +template +struct property_value, + std::integral_constant, + std::integral_constant> { + static_assert( + detail::AllNonZero::value, + "max_work_group_size property must only contain non-zero values."); + + using key_t = max_work_group_size_key; + + constexpr size_t operator[](int Dim) const { + return std::array{Dim0, Dim1, Dim2}[Dim]; + } +}; + +template +struct property_value> { + static_assert(Size != 0, "min_work_groups_per_multiprocessor_key property " + "must contain a non-zero value."); + + using key_t = min_work_groups_per_multiprocessor_key; + using value_t = std::integral_constant; + static constexpr uint32_t value = Size; +}; + +template +struct property_value> { + static_assert(Size != 0, "max_work_groups_per_cluster_key property must " + "contain a non-zero value."); + + using key_t = max_work_groups_per_cluster_key; + using value_t = std::integral_constant; + static constexpr uint32_t value = Size; +}; + template inline constexpr work_group_size_key::value_t work_group_size; @@ -156,6 +219,18 @@ inline constexpr nd_range_kernel_key::value_t nd_range_kernel; inline constexpr single_task_kernel_key::value_t single_task_kernel; +template +inline constexpr max_work_group_size_key::value_t + max_work_group_size; + +template +inline constexpr min_work_groups_per_multiprocessor_key::value_t + min_work_groups_per_multiprocessor; + +template +inline constexpr max_work_groups_per_cluster_key::value_t + max_work_groups_per_cluster; + struct work_group_progress_key : detail::compile_time_property_key { template struct PropertyMetaInfo { static constexpr const char *name = "sycl-single-task-kernel"; static constexpr int value = 0; }; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-work-group-size"; + static constexpr const char *value = SizeListToStr::value; +}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-min-work-groups-per-multiprocessor"; + static constexpr uint32_t value = Size; +}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-work-groups-per-cluster"; + static constexpr uint32_t value = Size; +}; template struct HasKernelPropertiesGetMethod : std::false_type {}; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index e225928c4cd68..4b15d6b45339c 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -212,8 +212,11 @@ enum PropKind : uint32_t { IncludeFiles = 71, RegisteredKernelNames = 72, ClusterLaunch = 73, + MaxWorkGroupSize = 74, + MinWorkGroupsPerMultiprocessor = 75, + MaxWorkGroupsPerCluster = 76, // PropKindSize must always be the last value. - PropKindSize = 74, + PropKindSize = 77, }; struct property_key_base_tag {}; diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp new file mode 100644 index 0000000000000..76ba1fcdd843f --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp @@ -0,0 +1,27 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::min_work_groups_per_multiprocessor<8>, + sycl::ext::oneapi::experimental::max_work_groups_per_cluster<4>, + }; + // CHECK-IR: spir_kernel void @{{.*}}LaunchBoundsKernel(){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] + // CHECK-IR-SAME: !max_work_groups_per_cluster ![[MaxWGsPerCMD:[0-9]+]] + // CHECK-IR-SAME: !min_work_groups_per_multiprocessor ![[MinWGsPerMPMD:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { +// CHECK-IR-SAME: "sycl-max-work-groups-per-cluster"="4" +// CHECK-IR-SAME: "sycl-min-work-groups-per-multiprocessor"="8" + +// CHECK-IR: ![[MaxWGsPerCMD]] = !{i32 4} +// CHECK-IR: ![[MinWGsPerMPMD]] = !{i32 8} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp new file mode 100644 index 0000000000000..4a137afd0d56a --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp @@ -0,0 +1,27 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::min_work_groups_per_multiprocessor<8>, + sycl::ext::oneapi::experimental::max_work_groups_per_cluster<4>, + }; + + // CHECK-IR: define{{.*}}void @[[LaunchBoundsKernelFn:.*LaunchBoundsKernel0]](){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { +// CHECK-IR-SAME: "sycl-max-work-groups-per-cluster"="4" +// CHECK-IR-SAME: "sycl-min-work-groups-per-multiprocessor"="8" + +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"kernel", i32 1} +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"minctasm", i32 8} +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"maxclusterrank", i32 4} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp new file mode 100644 index 0000000000000..218747f85af82 --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp @@ -0,0 +1,29 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + sycl::event Ev; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>}; + + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel0(){{.*}} #[[MaxWGSizeAttr1:[0-9]+]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1:[0-9]+]] + Q.single_task(Props, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel1(){{.*}} #[[MaxWGSizeAttr1]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1]] + Q.single_task(Ev, Props, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel2(){{.*}} #[[MaxWGSizeAttr1]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1]] + Q.single_task({Ev}, Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { {{.*}}"sycl-max-work-group-size"="8,4,2" + +// CHECK-IR: ![[MaxWGSizeMD1]] = !{i64 2, i64 4, i64 8} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp new file mode 100644 index 0000000000000..e2d4b66cd8f1d --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp @@ -0,0 +1,25 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>}; + + // CHECK-IR: define{{.*}}void @[[MaxWGSizeKernelFn:.*MaxWGSizeKernel0]](){{.*}} #[[MaxWGSizeAttr1:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { +// CHECK-IR-SAME: "sycl-max-work-group-size"="8,4,2" + +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"kernel", i32 1} +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidx", i32 2} +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidy", i32 4} +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidz", i32 8} diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index 3868c23f7535c..a1389090c42e6 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -50,6 +50,12 @@ int main() { static_assert( is_property_value)>::value); static_assert(is_property_value)>::value); + static_assert( + is_property_value)>::value); + static_assert(is_property_value< + decltype(min_work_groups_per_multiprocessor<8>)>::value); + static_assert( + is_property_value)>::value); static_assert( std::is_same_v)::key_t>); @@ -66,6 +72,15 @@ int main() { decltype(work_group_size_hint<13, 13, 13>)::key_t>); static_assert( std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); static_assert(work_group_size<15>[0] == 15); static_assert(work_group_size<16, 17>[0] == 16); @@ -80,6 +95,11 @@ int main() { static_assert(work_group_size_hint<24, 25, 26>[1] == 25); static_assert(work_group_size_hint<24, 25, 26>[2] == 26); static_assert(sub_group_size<27>.value == 27); + static_assert(max_work_group_size<28, 29, 30>[0] == 28); + static_assert(max_work_group_size<28, 29, 30>[1] == 29); + static_assert(max_work_group_size<28, 29, 30>[2] == 30); + static_assert(min_work_groups_per_multiprocessor<28>.value == 28); + static_assert(max_work_groups_per_cluster<29>.value == 29); static_assert(std::is_same_v)::value_t, std::integral_constant>);