Skip to content

[SYCL] Add kernel properties for three function attributes #14448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
30 changes: 30 additions & 0 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,22 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
addNVVMMetadata(F, "maxntidx", MWGS->getZDimVal());
addNVVMMetadata(F, "maxntidy", MWGS->getYDimVal());
addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal());
} else if (auto Attr = F->getFnAttribute("sycl-max-work-group-size");
Attr.isValid()) {
// Split values in the comma-separated list integers.
SmallVector<StringRef, 3> ValStrs;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a lot of code overlap between NVPTX lowering of kernel properties and CompileTimePropertiesPass lowering of kernel properties. We might want to think of a shared utility to work with kernel properties.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hiding the fact that they're all represented as strings in LLVM IR as an "implementation detail" and providing a std::optional<T> KernelPropertyAttr::getAsInteger or getAsIntegerList might be a good step.

Attr.getValueAsString().split(ValStrs, ',');
assert(ValStrs.size() == 3 && "Must have all three dimensions for "
"sycl-max-work-group-size property");

static constexpr const char *Annots[] = {"maxntidx", "maxntidy",
"maxntidz"};
for (auto [AnnotStr, ValStr] : zip(Annots, reverse(ValStrs))) {
int Value = 0;
bool Error = ValStr.getAsInteger(10, Value);
assert(!Error && "The attribute's value is not a number");
addNVVMMetadata(F, AnnotStr, Value);
}
}

auto attrValue = [&](Expr *E) {
Expand All @@ -278,6 +294,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue()));
HasMinWorkGroupPerCU = true;
}
} else if (auto Attr =
F->getFnAttribute("sycl-min-work-groups-per-multiprocessor");
Attr.isValid()) {
int Value = 0;
bool Error = Attr.getValueAsString().getAsInteger(10, Value);
assert(!Error && "The attribute's value is not a number");
addNVVMMetadata(F, "minctasm", Value);
}

if (const auto *MWGPMP =
Expand All @@ -291,6 +314,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// The value is guaranteed to be > 0, pass it to the metadata.
addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue()));
}
} else if (auto Attr =
F->getFnAttribute("sycl-max-work-groups-per-cluster");
Attr.isValid()) {
int Value = 0;
bool Error = Attr.getValueAsString().getAsInteger(10, Value);
assert(!Error && "The attribute's value is not a number");
addNVVMMetadata(F, "maxclusterrank", Value);
}
}

Expand Down
58 changes: 37 additions & 21 deletions llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,18 +361,29 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {
AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64);
}

if (AttrKindStr == "sycl-work-group-size" ||
AttrKindStr == "sycl-work-group-size-hint") {
static constexpr std::tuple<const char *, const char *, bool>
SimpleWGAttrs[] = {
{"sycl-work-group-size", "reqd_work_group_size",
/*RequiresAll3Dims*/ false},
{"sycl-work-group-size-hint", "work_group_size_hint",
/*RequiresAll3Dims*/ false},
{"sycl-max-work-group-size", "max_work_group_size",
/*RequiresAll3Dims*/ true},
};

for (auto &[AttrKind, MDStr, Req3D] : SimpleWGAttrs) {
if (AttrKindStr != AttrKind)
continue;
// Split values in the comma-separated list integers.
SmallVector<StringRef, 3> ValStrs;
Attr.getValueAsString().split(ValStrs, ',');
SmallVector<StringRef, 3> AttrValStrs;
Attr.getValueAsString().split(AttrValStrs, ',');

assert(ValStrs.size() <= 3 &&
"sycl-work-group-size and sycl-work-group-size-hint currently only "
"support up to three values");
assert(((Req3D && AttrValStrs.size() == 3) ||
(!Req3D && AttrValStrs.size() <= 3)) &&
"Incorrect number of values for kernel property");

// SYCL work-group sizes must be reversed for SPIR-V.
std::reverse(ValStrs.begin(), ValStrs.end());
std::reverse(AttrValStrs.begin(), AttrValStrs.end());

// Use integer pointer size as closest analogue to size_t.
IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx);
Expand All @@ -381,24 +392,29 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) {

// Get the integers from the strings.
SmallVector<Metadata *, 3> MDVals;
for (StringRef ValStr : ValStrs)
for (StringRef ValStr : AttrValStrs)
MDVals.push_back(ConstantAsMetadata::get(
Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10))));

const char *MDName = (AttrKindStr == "sycl-work-group-size")
? "reqd_work_group_size"
: "work_group_size_hint";
return std::pair<std::string, MDNode *>(MDName, MDNode::get(Ctx, MDVals));
return std::pair<std::string, MDNode *>(MDStr, MDNode::get(Ctx, MDVals));
}

if (AttrKindStr == "sycl-sub-group-size") {
uint32_t SubGroupSize = getAttributeAsInteger<uint32_t>(Attr);
IntegerType *Ty = Type::getInt32Ty(Ctx);
Metadata *MDVal = ConstantAsMetadata::get(
Constant::getIntegerValue(Ty, APInt(32, SubGroupSize)));
SmallVector<Metadata *, 1> MD{MDVal};
return std::pair<std::string, MDNode *>("intel_reqd_sub_group_size",
MDNode::get(Ctx, MD));
static constexpr std::pair<const char *, const char *> SimpleI32Attrs[] = {
{"sycl-sub-group-size", "intel_reqd_sub_group_size"},
{"sycl-min-work-groups-per-multiprocessor",
"min_work_groups_per_multiprocessor"},
{"sycl-max-work-groups-per-cluster", "max_work_groups_per_cluster"},
};

for (auto [AttrKind, MDStr] : SimpleI32Attrs) {
if (AttrKindStr == AttrKind) {
uint32_t SubGroupSize = getAttributeAsInteger<uint32_t>(Attr);
IntegerType *Ty = Type::getInt32Ty(Ctx);
Metadata *MDVal = ConstantAsMetadata::get(
Constant::getIntegerValue(Ty, APInt(32, SubGroupSize)));
SmallVector<Metadata *, 1> MD{MDVal};
return std::pair<std::string, MDNode *>(MDStr, MDNode::get(Ctx, MD));
}
}

// The sycl-single-task attribute currently only has an effect when targeting
Expand Down
90 changes: 90 additions & 0 deletions sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,31 @@ struct single_task_kernel_key {
using value_t = property_value<single_task_kernel_key>;
};

struct max_work_group_size_key
: detail::compile_time_property_key<detail::PropKind::MaxWorkGroupSize> {
template <size_t Dim0, size_t Dim1, size_t Dim2>
using value_t = property_value<max_work_group_size_key,
std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dim1>,
std::integral_constant<size_t, Dim2>>;
};

struct min_work_groups_per_multiprocessor_key
: detail::compile_time_property_key<
detail::PropKind::MinWorkGroupsPerMultiprocessor> {
template <uint32_t Size>
using value_t = property_value<min_work_groups_per_multiprocessor_key,
std::integral_constant<uint32_t, Size>>;
};

struct max_work_groups_per_cluster_key
: detail::compile_time_property_key<
detail::PropKind::MaxWorkGroupsPerCluster> {
template <uint32_t Size>
using value_t = property_value<max_work_groups_per_cluster_key,
std::integral_constant<uint32_t, Size>>;
};

template <size_t Dim0, size_t... Dims>
struct property_value<work_group_size_key, std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dims>...> {
Expand Down Expand Up @@ -138,6 +163,44 @@ template <> struct property_value<single_task_kernel_key> {
using key_t = single_task_kernel_key;
};

template <size_t Dim0, size_t Dim1, size_t Dim2>
struct property_value<max_work_group_size_key,
std::integral_constant<size_t, Dim0>,
std::integral_constant<size_t, Dim1>,
std::integral_constant<size_t, Dim2>> {
static_assert(
detail::AllNonZero<Dim0, Dim1, Dim2>::value,
"max_work_group_size property must only contain non-zero values.");

using key_t = max_work_group_size_key;

constexpr size_t operator[](int Dim) const {
return std::array<size_t, 3>{Dim0, Dim1, Dim2}[Dim];
}
};

template <uint32_t Size>
struct property_value<min_work_groups_per_multiprocessor_key,
std::integral_constant<uint32_t, Size>> {
static_assert(Size != 0, "min_work_groups_per_multiprocessor_key property "
"must contain a non-zero value.");

using key_t = min_work_groups_per_multiprocessor_key;
using value_t = std::integral_constant<uint32_t, Size>;
static constexpr uint32_t value = Size;
};

template <uint32_t Size>
struct property_value<max_work_groups_per_cluster_key,
std::integral_constant<uint32_t, Size>> {
static_assert(Size != 0, "max_work_groups_per_cluster_key property must "
"contain a non-zero value.");

using key_t = max_work_groups_per_cluster_key;
using value_t = std::integral_constant<uint32_t, Size>;
static constexpr uint32_t value = Size;
};

template <size_t Dim0, size_t... Dims>
inline constexpr work_group_size_key::value_t<Dim0, Dims...> work_group_size;

Expand All @@ -156,6 +219,18 @@ inline constexpr nd_range_kernel_key::value_t<Dims> nd_range_kernel;

inline constexpr single_task_kernel_key::value_t single_task_kernel;

template <size_t Dim0, size_t Dim1, size_t Dim2>
inline constexpr max_work_group_size_key::value_t<Dim0, Dim1, Dim2>
max_work_group_size;

template <uint32_t Size>
inline constexpr min_work_groups_per_multiprocessor_key::value_t<Size>
min_work_groups_per_multiprocessor;

template <uint32_t Size>
inline constexpr max_work_groups_per_cluster_key::value_t<Size>
max_work_groups_per_cluster;

struct work_group_progress_key
: detail::compile_time_property_key<detail::PropKind::WorkGroupProgress> {
template <forward_progress_guarantee Guarantee,
Expand Down Expand Up @@ -270,6 +345,21 @@ template <> struct PropertyMetaInfo<single_task_kernel_key::value_t> {
static constexpr const char *name = "sycl-single-task-kernel";
static constexpr int value = 0;
};
template <size_t Dim0, size_t Dim1, size_t Dim2>
struct PropertyMetaInfo<max_work_group_size_key::value_t<Dim0, Dim1, Dim2>> {
static constexpr const char *name = "sycl-max-work-group-size";
static constexpr const char *value = SizeListToStr<Dim0, Dim1, Dim2>::value;
};
template <uint32_t Size>
struct PropertyMetaInfo<min_work_groups_per_multiprocessor_key::value_t<Size>> {
static constexpr const char *name = "sycl-min-work-groups-per-multiprocessor";
static constexpr uint32_t value = Size;
};
template <uint32_t Size>
struct PropertyMetaInfo<max_work_groups_per_cluster_key::value_t<Size>> {
static constexpr const char *name = "sycl-max-work-groups-per-cluster";
static constexpr uint32_t value = Size;
};

template <typename T, typename = void>
struct HasKernelPropertiesGetMethod : std::false_type {};
Expand Down
5 changes: 4 additions & 1 deletion sycl/include/sycl/ext/oneapi/properties/property.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,11 @@ enum PropKind : uint32_t {
IncludeFiles = 71,
RegisteredKernelNames = 72,
ClusterLaunch = 73,
MaxWorkGroupSize = 74,
MinWorkGroupsPerMultiprocessor = 75,
MaxWorkGroupsPerCluster = 76,
// PropKindSize must always be the last value.
PropKindSize = 74,
PropKindSize = 77,
};

struct property_key_base_tag {};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s
// expected-no-diagnostics

#include <sycl/sycl.hpp>

int main() {
sycl::queue Q;

constexpr auto Props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::min_work_groups_per_multiprocessor<8>,
sycl::ext::oneapi::experimental::max_work_groups_per_cluster<4>,
};
// CHECK-IR: spir_kernel void @{{.*}}LaunchBoundsKernel(){{.*}} #[[LaunchBoundsAttrs:[0-9]+]]
// CHECK-IR-SAME: !max_work_groups_per_cluster ![[MaxWGsPerCMD:[0-9]+]]
// CHECK-IR-SAME: !min_work_groups_per_multiprocessor ![[MinWGsPerMPMD:[0-9]+]]
Q.single_task<class LaunchBoundsKernel>(Props, []() {});

return 0;
}

// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = {
// CHECK-IR-SAME: "sycl-max-work-groups-per-cluster"="4"
// CHECK-IR-SAME: "sycl-min-work-groups-per-multiprocessor"="8"

// CHECK-IR: ![[MaxWGsPerCMD]] = !{i32 4}
// CHECK-IR: ![[MinWGsPerMPMD]] = !{i32 8}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR

#include <sycl/sycl.hpp>

int main() {
sycl::queue Q;

constexpr auto Props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::min_work_groups_per_multiprocessor<8>,
sycl::ext::oneapi::experimental::max_work_groups_per_cluster<4>,
};

// CHECK-IR: define{{.*}}void @[[LaunchBoundsKernelFn:.*LaunchBoundsKernel0]](){{.*}} #[[LaunchBoundsAttrs:[0-9]+]]
Q.single_task<class LaunchBoundsKernel0>(Props, []() {});

return 0;
}

// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = {
// CHECK-IR-SAME: "sycl-max-work-groups-per-cluster"="4"
// CHECK-IR-SAME: "sycl-min-work-groups-per-multiprocessor"="8"

// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"kernel", i32 1}
// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"minctasm", i32 8}
// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"maxclusterrank", i32 4}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR
// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s
// expected-no-diagnostics

#include <sycl/sycl.hpp>

int main() {
sycl::queue Q;
sycl::event Ev;

constexpr auto Props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>};

// CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel0(){{.*}} #[[MaxWGSizeAttr1:[0-9]+]]
// CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1:[0-9]+]]
Q.single_task<class MaxWGSizeKernel0>(Props, []() {});
// CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel1(){{.*}} #[[MaxWGSizeAttr1]]
// CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1]]
Q.single_task<class MaxWGSizeKernel1>(Ev, Props, []() {});
// CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel2(){{.*}} #[[MaxWGSizeAttr1]]
// CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1]]
Q.single_task<class MaxWGSizeKernel2>({Ev}, Props, []() {});

return 0;
}

// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { {{.*}}"sycl-max-work-group-size"="8,4,2"

// CHECK-IR: ![[MaxWGSizeMD1]] = !{i64 2, i64 4, i64 8}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR

#include <sycl/sycl.hpp>

int main() {
sycl::queue Q;

constexpr auto Props = sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>};

// CHECK-IR: define{{.*}}void @[[MaxWGSizeKernelFn:.*MaxWGSizeKernel0]](){{.*}} #[[MaxWGSizeAttr1:[0-9]+]]
Q.single_task<class MaxWGSizeKernel0>(Props, []() {});

return 0;
}

// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = {
// CHECK-IR-SAME: "sycl-max-work-group-size"="8,4,2"

// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"kernel", i32 1}
// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidx", i32 2}
// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidy", i32 4}
// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidz", i32 8}
Loading
Loading