Skip to content

[clang][SYCL] Do not decompose SYCL functors unless necessary #18258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")
LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels")
LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor")
LANGOPT(SYCLDecomposeStruct, 1, 0, "Force top level decomposition of SYCL functor")
LANGOPT(
SYCLValueFitInMaxInt, 1, 1,
"SYCL compiler assumes value fits within MAX_INT for member function of "
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -7106,11 +7106,11 @@ defm sycl_instrument_device_code
"(experimental)">>;
defm sycl_decompose_functor
: BoolFOption<"sycl-decompose-functor",
LangOpts<"SYCLDecomposeStruct">, DefaultTrue,
LangOpts<"SYCLDecomposeStruct">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CLOption], "Do">,
NegFlag<SetFalse, [], [ClangOption, CLOption], "Do not">,
BothFlags<[], [ClangOption, CLOption, CC1Option],
" decompose SYCL functor if possible (experimental, CUDA only)">>;
" decompose SYCL functor if possible (default is false)">>;
defm sycl_cuda_compat
: BoolFOption<"sycl-cuda-compatibility", LangOpts<"SYCLCUDACompat">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CLOption, CC1Option], "Enable CUDA compatibility mode (experimental). "
Expand Down
5 changes: 1 addition & 4 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,10 +319,7 @@ ExprResult SemaSYCL::BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc,

/// Returns true if the target requires a new type.
/// This happens if a pointer to generic cannot be passed
static bool targetRequiresNewType(ASTContext &Context) {
llvm::Triple T = Context.getTargetInfo().getTriple();
return !T.isNVPTX();
}
static bool targetRequiresNewType(ASTContext &Context) { return false; }

// This information is from Section 4.13 of the SYCL spec
// https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf
Expand Down
3 changes: 2 additions & 1 deletion clang/test/SemaSYCL/no-decomp.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP
// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP
// RUN: %clang_cc1 -fsycl-is-device -triple spir64 -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP
// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP
// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP

Expand Down
4 changes: 3 additions & 1 deletion sycl/test-e2e/ESIMD/lsc/lsc_load_store_2d_smoke.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
//===----------------------------------------------------------------------===//
// REQUIRES: arch-intel_gpu_pvc
// REQUIRES-INTEL-DRIVER: lin: 30508
// RUN: %{build} -o %t.out
// Shouldn't have to use -fsycl-decompose-functor,
// See https://github.com/intel/llvm-test-suite/issues/18317
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out

// Basic test for new lsc_load_2d/lsc_store_2d API.
Expand Down
4 changes: 3 additions & 1 deletion sycl/test-e2e/ESIMD/unified_memory_api/scatter_usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------------===//
// RUN: %{build} -fsycl-device-code-split=per_kernel -D__ESIMD_GATHER_SCATTER_LLVM_IR -o %t.out
// Shouldn't have to use -fsycl-decompose-functor,
// See https://github.com/intel/llvm-test-suite/issues/18317
// RUN: %{build} -fsycl-device-code-split=per_kernel -fsycl-decompose-functor -D__ESIMD_GATHER_SCATTER_LLVM_IR -o %t.out
// RUN: %{run} %t.out

// The test verifies esimd::scatter() functions accepting USM pointer
Expand Down
4 changes: 3 additions & 1 deletion sycl/test-e2e/ESIMD/usm_gather_scatter_rgba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// RUN: %{build} -o %t.out
// Shouldn't have to use -fsycl-decompose-functor,
// See https://github.com/intel/llvm-test-suite/issues/18317
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
//
// The test checks functionality of the gather_rgba/scatter_rgba USM-based ESIMD
Expand Down
4 changes: 3 additions & 1 deletion sycl/test-e2e/ESIMD/usm_gather_scatter_rgba_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// RUN: %{build} -o %t.out
// Shouldn't have to use -fsycl-decompose-functor,
// See https://github.com/intel/llvm-test-suite/issues/18317
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
//
// The test checks functionality of the gather_rgba/scatter_rgba USM-based ESIMD
Expand Down
4 changes: 3 additions & 1 deletion sycl/test-e2e/ESIMD/vadd_raw_send_gen12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
//
//===---------------------------------------------------------===//
// REQUIRES: gpu-intel-gen12
// RUN: %{build} -o %t.out
// Shouldn't have to use -fsycl-decompose-functor,
// See https://github.com/intel/llvm-test-suite/issues/18317
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out

#include "esimd_test_utils.hpp"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %{build} -o %t.out
// By default functors are no longer decomposed preventing the use of set_arg in
// this test, -fsycl-decompose-functor is used to force the old behavior
//
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 env SYCL_UR_TRACE=2 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %{build} -o %t.out
// By default functors are no longer decomposed preventing the use of set_arg in
// this test, -fsycl-decompose-functor is used to force the old behavior
//
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
Expand Down
5 changes: 4 additions & 1 deletion sycl/test-e2e/Graph/Update/update_nullptr.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %{build} -o %t.out
// By default functors are no longer decomposed preventing the use of set_arg in
// this test, -fsycl-decompose-functor is used to force the old behavior
//
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %{build} -o %t.out
// By default functors are no longer decomposed preventing the use of set_arg in
// this test, -fsycl-decompose-functor is used to force the old behavior
//
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %{build} -o %t.out
// By default functors are no longer decomposed preventing the use of set_arg in
// this test, -fsycl-decompose-functor is used to force the old behavior
//
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
Expand Down
5 changes: 4 additions & 1 deletion sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %{build} -o %t.out
// By default functors are no longer decomposed preventing the use of set_arg in
// this test, -fsycl-decompose-functor is used to force the old behavior
//
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
Expand Down
5 changes: 4 additions & 1 deletion sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %{build} -o %t.out
// By default functors are no longer decomposed preventing the use of set_arg in
// this test, -fsycl-decompose-functor is used to force the old behavior
//
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// UNSUPPORTED: windows || target-amd
// RUN: %{build} -o %t.out
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s

// Test parameter analysis of USM usage
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// UNSUPPORTED: windows || hip
// RUN: %{build} -o %t.out
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s

// Test parameter analysis of USM usage
Expand Down
2 changes: 1 addition & 1 deletion sycl/test-e2e/XPTI/basic_event_collection_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: xptifw, opencl, cpu, linux
// RUN: %build_collector
// RUN: %{build} -o %t.out
// RUN: %{build} -fsycl-decompose-functor -o %t.out
// RUN: env UR_ENABLE_LAYERS=UR_LAYER_TRACING env XPTI_TRACE_ENABLE=1 env XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher env XPTI_SUBSCRIBERS=%t_collector.dll %{run} %t.out | FileCheck %s

#include "basic_event_collection.inc"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,19 @@ void cache_control_read_write_func() {
}

// CHECK-IR: spir_kernel{{.*}}cache_control_read_hint_func
// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RHINT:.*]]
// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[RHINT:.*]]
// CHECK-IR: ret void

// CHECK-IR: spir_kernel{{.*}}cache_control_read_assertion_func
// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RASSERT:.*]]
// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[RASSERT:.*]]
// CHECK-IR: ret void

// CHECK-IR: spir_kernel{{.*}}cache_control_write_hint_func
// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[WHINT:.*]]
// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[WHINT:.*]]
// CHECK-IR: ret void

// CHECK-IR: spir_kernel{{.*}}cache_control_read_write_func
// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RWHINT:.*]]
// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[RWHINT:.*]]
// CHECK-IR: ret void

// CHECK-IR: [[RHINT]] = !{[[RHINT1:.*]], [[RHINT2:.*]], [[RHINT3:.*]]}
Expand Down
Loading