Skip to content

Enable prefetch iteration #382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: sycl-develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions include/cute/arch/xe_copy_1B.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ struct XE_2D_U8x1x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -309,6 +310,7 @@ struct XE_2D_U8x2x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -343,6 +345,7 @@ struct XE_2D_U8x4x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -377,6 +380,7 @@ struct XE_2D_U8x8x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -410,6 +414,7 @@ struct XE_2D_U8x16x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -602,6 +607,7 @@ struct XE_2D_U8x1x64_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -634,6 +640,7 @@ struct XE_2D_U8x2x64_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -666,6 +673,7 @@ struct XE_2D_U8x4x64_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -698,6 +706,7 @@ struct XE_2D_U8x8x64_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -730,6 +739,7 @@ struct XE_2D_U8x16x64_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -763,6 +773,7 @@ struct XE_2D_U8x32x64_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -798,6 +809,7 @@ struct XE_2D_U8x32x16_LD_V {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down
69 changes: 9 additions & 60 deletions include/cute/arch/xe_copy_2B.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ struct XE_2D_U16x8x16_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -360,6 +361,7 @@ struct XE_2D_U16x16x16_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -393,6 +395,7 @@ struct XE_2D_U16x32x16_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -426,6 +429,7 @@ struct XE_2D_U16x1x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -458,6 +462,7 @@ struct XE_2D_U16x2x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -490,6 +495,7 @@ struct XE_2D_U16x4x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -522,6 +528,7 @@ struct XE_2D_U16x8x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -555,6 +562,7 @@ struct XE_2D_U16x16x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -588,6 +596,7 @@ struct XE_2D_U16x32x32_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -620,21 +629,6 @@ struct XE_2D_U16x16x16_LD_V {
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-Xe hardware");
#endif
}

struct PREFETCH {
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
#if defined(SYCL_INTEL_TARGET)
__builtin_IB_subgroup_block_read_prefetch_u16_m16k16v1(
(intptr_t)baseoffset, width - 1, height - 1, pitch - 1, coord,
CacheControl::kL1C_L3C);
#else
CUTE_INVALID_CONTROL_PATH(
"Trying to use block prefetch on non-Xe hardware");
#endif
}
};
};

struct XE_2D_U16x32x16_LD_V {
Expand All @@ -653,21 +647,6 @@ struct XE_2D_U16x32x16_LD_V {
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-Xe hardware");
#endif
}

struct PREFETCH {
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
#if defined(SYCL_INTEL_TARGET)
__builtin_IB_subgroup_block_read_prefetch_u16_m32k16v1(
(intptr_t)baseoffset, width - 1, height - 1, pitch - 1, coord,
CacheControl::kL1C_L3C);
#else
CUTE_INVALID_CONTROL_PATH(
"Trying to use block prefetch on non-Xe hardware");
#endif
}
};
};

struct XE_2D_U16x16x32_LD_V {
Expand All @@ -686,21 +665,6 @@ struct XE_2D_U16x16x32_LD_V {
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-Xe hardware");
#endif
}

struct PREFETCH {
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
#if defined(SYCL_INTEL_TARGET)
__builtin_IB_subgroup_block_read_prefetch_u16_m16k16v2(
(intptr_t)baseoffset, width - 1, height - 1, pitch - 1, coord,
CacheControl::kL1C_L3C);
#else
CUTE_INVALID_CONTROL_PATH(
"Trying to use block prefetch on non-Xe hardware");
#endif
}
};
};

struct XE_2D_U16x32x32_LD_V {
Expand All @@ -719,21 +683,6 @@ struct XE_2D_U16x32x32_LD_V {
CUTE_INVALID_CONTROL_PATH("Trying to use block loads on non-Xe hardware");
#endif
}

struct PREFETCH {
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
#if defined(SYCL_INTEL_TARGET)
__builtin_IB_subgroup_block_read_prefetch_u16_m16k16v2(
(intptr_t)baseoffset, width - 1, height - 1, pitch - 1, coord,
CacheControl::kL1C_L3C);
#else
CUTE_INVALID_CONTROL_PATH(
"Trying to use block prefetch on non-Xe hardware");
#endif
}
};
};

struct XE_2D_U16x16x8_LD_T {
Expand Down
2 changes: 2 additions & 0 deletions include/cute/arch/xe_copy_4B.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ struct XE_2D_TF32x16x16_LD_N {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down Expand Up @@ -696,6 +697,7 @@ struct XE_2D_U32x16x8_LD_T {
}

struct PREFETCH {
using BlockShape = BlockShape;
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch,
intel::coord_t coord) {
Expand Down
5 changes: 4 additions & 1 deletion include/cute/atom/copy_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,11 @@ namespace detail {
template <class CopyOp, class = void>
constexpr bool is_prefetch = false;

//template <class CopyOp>
//constexpr bool is_prefetch<CopyOp, void_t<typename CopyOp::PREFETCH>> = is_same_v<CopyOp, typename CopyOp::PREFETCH>;

template <class CopyOp>
constexpr bool is_prefetch<CopyOp, void_t<typename CopyOp::PREFETCH>> = is_same_v<CopyOp, typename CopyOp::PREFETCH>;
constexpr bool is_prefetch<CopyOp, void_t<decltype(CopyOp{}.copy(nullptr, 0,0,0, {0,0}))>> = true;
Comment on lines +156 to +160
Copy link
Collaborator

@aacostadiaz aacostadiaz May 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
//template <class CopyOp>
//constexpr bool is_prefetch<CopyOp, void_t<typename CopyOp::PREFETCH>> = is_same_v<CopyOp, typename CopyOp::PREFETCH>;
template <class CopyOp>
constexpr bool is_prefetch<CopyOp, void_t<typename CopyOp::PREFETCH>> = is_same_v<CopyOp, typename CopyOp::PREFETCH>;
constexpr bool is_prefetch<CopyOp, void_t<decltype(CopyOp{}.copy(nullptr, 0,0,0, {0,0}))>> = true;
#if define(SYCL_INTEL_TARGET)
template <class CopyOp>
constexpr bool is_prefetch<CopyOp, void_t<decltype(CopyOp{}.copy(nullptr, 0,0,0, {0,0}))>> = true;
#else
// TODO(Codeplay): Enable for SYCL_INTEL_TARGET.
template <class CopyOp>
constexpr bool is_prefetch<CopyOp, void_t<typename CopyOp::PREFETCH>> = is_same_v<CopyOp, typename CopyOp::PREFETCH>;
#endif


} // end namespace detail

Expand Down
Loading
Loading