From a7280e54e9497f71ecd88d8ab4dd911c02da6220 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Wed, 30 Apr 2025 13:07:59 +0100 Subject: [PATCH 1/6] [clang][SYCL] Do not decompose SYCL functors unless necessary The top level of SYCL functors are decomposed by default regardless of the content. The patch forces SYCL functors to only be decompose if there is a special type inside. --- clang/include/clang/Basic/LangOptions.def | 2 +- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Sema/SemaSYCL.cpp | 3 +-- clang/test/SemaSYCL/no-decomp.cpp | 3 ++- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 99e39359ed21e..bfa31df24a189 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -312,7 +312,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions") LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension") LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels") -LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor") +LANGOPT(SYCLDecomposeStruct, 1, 0, "Force top level decomposition of SYCL functor") LANGOPT( SYCLValueFitInMaxInt, 1, 1, "SYCL compiler assumes value fits within MAX_INT for member function of " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d7e60e601293b..21055e8f4482c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7089,11 +7089,11 @@ defm sycl_instrument_device_code "(experimental)">>; defm sycl_decompose_functor : BoolFOption<"sycl-decompose-functor", - LangOpts<"SYCLDecomposeStruct">, DefaultTrue, + LangOpts<"SYCLDecomposeStruct">, DefaultFalse, PosFlag, NegFlag, BothFlags<[], [ClangOption, CLOption, CC1Option], - " decompose SYCL functor if possible (experimental, CUDA only)">>; + " decompose SYCL functor if possible (default is false)">>; defm sycl_cuda_compat : BoolFOption<"sycl-cuda-compatibility", LangOpts<"SYCLCUDACompat">, DefaultFalse, PosFlag Date: Fri, 2 May 2025 09:21:01 +0100 Subject: [PATCH 2/6] Update tests --- sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp | 5 ++++- .../Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp | 5 ++++- sycl/test-e2e/Graph/Update/update_nullptr.cpp | 5 ++++- ...ate_with_indices_ptr_multiple_nodes_different_indices.cpp | 5 ++++- .../Graph/Update/update_with_indices_ptr_multiple_params.cpp | 5 ++++- sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp | 5 ++++- sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp | 5 ++++- sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp | 2 +- .../Tracing/usm/queue_single_task_released_pointer.cpp | 2 +- 9 files changed, 30 insertions(+), 9 deletions(-) diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp index 5aa691b9c36ae..0675576e4e087 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 env SYCL_UR_TRACE=2 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp index 65d27070a1b0c..663eb9413c9f3 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_nullptr.cpp b/sycl/test-e2e/Graph/Update/update_nullptr.cpp index 060386c6659a3..787ae75ca048b 100644 --- a/sycl/test-e2e/Graph/Update/update_nullptr.cpp +++ b/sycl/test-e2e/Graph/Update/update_nullptr.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp index 0b5d97dffcccb..1cc2b45a13ed6 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp index 212074b5450f3..e869190937382 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp index 3c4bb8f189e7a..f19df4cda68cd 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp index b894685a8bd87..97209ce10b410 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp index ef8b98b98301f..34682a61cf62b 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp @@ -1,5 +1,5 @@ // UNSUPPORTED: windows || target-amd -// RUN: %{build} -o %t.out +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp index 4444ee1b7b903..c40d2bf557cd4 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp @@ -1,5 +1,5 @@ // UNSUPPORTED: windows || hip -// RUN: %{build} -o %t.out +// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out // RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage From 39a119f98a51b55bebd64469e21eff015db40573 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Fri, 2 May 2025 21:29:58 +0100 Subject: [PATCH 3/6] fixes --- clang/lib/Sema/SemaSYCL.cpp | 4 +--- sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp | 4 ++-- .../Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp | 4 ++-- sycl/test-e2e/Graph/Update/update_nullptr.cpp | 4 ++-- ...date_with_indices_ptr_multiple_nodes_different_indices.cpp | 4 ++-- .../Graph/Update/update_with_indices_ptr_multiple_params.cpp | 4 ++-- sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp | 4 ++-- sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp | 4 ++-- sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp | 2 +- .../Tracing/usm/queue_single_task_released_pointer.cpp | 2 +- 10 files changed, 17 insertions(+), 19 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 8d85a96bba113..83d7ae630f4ae 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -319,9 +319,7 @@ ExprResult SemaSYCL::BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc, /// Returns true if the target requires a new type. /// This happens if a pointer to generic cannot be passed -static bool targetRequiresNewType(ASTContext &Context) { - return false; -} +static bool targetRequiresNewType(ASTContext &Context) { return false; } // This information is from Section 4.13 of the SYCL spec // https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp index 0675576e4e087..31bc42eea6742 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_different_arg_nums.cpp @@ -1,7 +1,7 @@ // By default functors are no longer decomposed preventing the use of set_arg in -// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// this test, -fsycl-decompose-functor is used to force the old behavior // -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: env SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 env SYCL_UR_TRACE=2 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp index 663eb9413c9f3..8953c37bd917f 100644 --- a/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_different_type_dyn_params.cpp @@ -1,7 +1,7 @@ // By default functors are no longer decomposed preventing the use of set_arg in -// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// this test, -fsycl-decompose-functor is used to force the old behavior // -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_nullptr.cpp b/sycl/test-e2e/Graph/Update/update_nullptr.cpp index 787ae75ca048b..9e155a7b546ea 100644 --- a/sycl/test-e2e/Graph/Update/update_nullptr.cpp +++ b/sycl/test-e2e/Graph/Update/update_nullptr.cpp @@ -1,7 +1,7 @@ // By default functors are no longer decomposed preventing the use of set_arg in -// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// this test, -fsycl-decompose-functor is used to force the old behavior // -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp index 1cc2b45a13ed6..aa5b715cd4b2c 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_nodes_different_indices.cpp @@ -1,7 +1,7 @@ // By default functors are no longer decomposed preventing the use of set_arg in -// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// this test, -fsycl-decompose-functor is used to force the old behavior // -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp index e869190937382..9723a03bab2a8 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_ptr_multiple_params.cpp @@ -1,7 +1,7 @@ // By default functors are no longer decomposed preventing the use of set_arg in -// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// this test, -fsycl-decompose-functor is used to force the old behavior // -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp index f19df4cda68cd..41441d5408e6a 100644 --- a/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp +++ b/sycl/test-e2e/Graph/Update/update_with_indices_scalar.cpp @@ -1,7 +1,7 @@ // By default functors are no longer decomposed preventing the use of set_arg in -// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// this test, -fsycl-decompose-functor is used to force the old behavior // -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp index 97209ce10b410..c6dcc2caf82a8 100644 --- a/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp +++ b/sycl/test-e2e/Graph/Update/whole_update_dynamic_param.cpp @@ -1,7 +1,7 @@ // By default functors are no longer decomposed preventing the use of set_arg in -// this test, -fsycl-sycl-decompose-functor is used to force the old behavior +// this test, -fsycl-decompose-functor is used to force the old behavior // -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp index 34682a61cf62b..fadfc0c035d69 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_nullptr.cpp @@ -1,5 +1,5 @@ // UNSUPPORTED: windows || target-amd -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage diff --git a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp index c40d2bf557cd4..8ec486a83d485 100644 --- a/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp +++ b/sycl/test-e2e/Tracing/usm/queue_single_task_released_pointer.cpp @@ -1,5 +1,5 @@ // UNSUPPORTED: windows || hip -// RUN: %{build} -fsycl-sycl-decompose-functor -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: not --crash env SYCL_TRACE_TERMINATE_ON_WARNING=1 %{run} sycl-trace --verify %t.out | FileCheck %s // Test parameter analysis of USM usage From 47df942561ad33614612d441ba63a568a16a0457 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Fri, 2 May 2025 21:48:08 +0100 Subject: [PATCH 4/6] fix more test --- .../extensions/properties/properties_cache_control.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test/check_device_code/extensions/properties/properties_cache_control.cpp b/sycl/test/check_device_code/extensions/properties/properties_cache_control.cpp index cf853d2a6c7ac..58bf4fd9a084e 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_cache_control.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_cache_control.cpp @@ -82,19 +82,19 @@ void cache_control_read_write_func() { } // CHECK-IR: spir_kernel{{.*}}cache_control_read_hint_func -// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RHINT:.*]] +// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[RHINT:.*]] // CHECK-IR: ret void // CHECK-IR: spir_kernel{{.*}}cache_control_read_assertion_func -// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RASSERT:.*]] +// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[RASSERT:.*]] // CHECK-IR: ret void // CHECK-IR: spir_kernel{{.*}}cache_control_write_hint_func -// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[WHINT:.*]] +// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[WHINT:.*]] // CHECK-IR: ret void // CHECK-IR: spir_kernel{{.*}}cache_control_read_write_func -// CHECK-IR: {{.*}}addrspacecast ptr addrspace(1){{.*}}!spirv.Decorations [[RWHINT:.*]] +// CHECK-IR: {{.*}}load ptr addrspace(4), ptr{{.*}}!spirv.Decorations [[RWHINT:.*]] // CHECK-IR: ret void // CHECK-IR: [[RHINT]] = !{[[RHINT1:.*]], [[RHINT2:.*]], [[RHINT3:.*]]} From f617c4a19e81d66f71b2942abc35f4a882c00a00 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Mon, 5 May 2025 16:18:32 +0100 Subject: [PATCH 5/6] work around tests --- sycl/test-e2e/ESIMD/lsc/lsc_load_store_2d_smoke.cpp | 4 +++- sycl/test-e2e/ESIMD/unified_memory_api/scatter_usm.cpp | 4 +++- sycl/test-e2e/ESIMD/usm_gather_scatter_rgba.cpp | 4 +++- sycl/test-e2e/ESIMD/usm_gather_scatter_rgba_64.cpp | 4 +++- sycl/test-e2e/ESIMD/vadd_raw_send_gen12.cpp | 4 +++- sycl/test-e2e/XPTI/basic_event_collection_linux.cpp | 2 +- 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/sycl/test-e2e/ESIMD/lsc/lsc_load_store_2d_smoke.cpp b/sycl/test-e2e/ESIMD/lsc/lsc_load_store_2d_smoke.cpp index 940e6f7793256..afbc09c862be2 100644 --- a/sycl/test-e2e/ESIMD/lsc/lsc_load_store_2d_smoke.cpp +++ b/sycl/test-e2e/ESIMD/lsc/lsc_load_store_2d_smoke.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// // REQUIRES: arch-intel_gpu_pvc // REQUIRES-INTEL-DRIVER: lin: 30508 -// RUN: %{build} -o %t.out +// Shouldn't have to use -fsycl-decompose-functor, +// See https://github.com/intel/llvm-test-suite/issues/18317 +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Basic test for new lsc_load_2d/lsc_store_2d API. diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/scatter_usm.cpp b/sycl/test-e2e/ESIMD/unified_memory_api/scatter_usm.cpp index ff331a421ccef..ec6f7c480cee1 100644 --- a/sycl/test-e2e/ESIMD/unified_memory_api/scatter_usm.cpp +++ b/sycl/test-e2e/ESIMD/unified_memory_api/scatter_usm.cpp @@ -5,7 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===------------------------------------------------------------------===// -// RUN: %{build} -fsycl-device-code-split=per_kernel -D__ESIMD_GATHER_SCATTER_LLVM_IR -o %t.out +// Shouldn't have to use -fsycl-decompose-functor, +// See https://github.com/intel/llvm-test-suite/issues/18317 +// RUN: %{build} -fsycl-device-code-split=per_kernel -fsycl-decompose-functor -D__ESIMD_GATHER_SCATTER_LLVM_IR -o %t.out // RUN: %{run} %t.out // The test verifies esimd::scatter() functions accepting USM pointer diff --git a/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba.cpp b/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba.cpp index f3239439a594d..cd450ca85811c 100644 --- a/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba.cpp +++ b/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba.cpp @@ -5,7 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// RUN: %{build} -o %t.out +// Shouldn't have to use -fsycl-decompose-functor, +// See https://github.com/intel/llvm-test-suite/issues/18317 +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // // The test checks functionality of the gather_rgba/scatter_rgba USM-based ESIMD diff --git a/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba_64.cpp b/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba_64.cpp index 4b0bbcf6baada..1f5dedb1e7502 100644 --- a/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba_64.cpp +++ b/sycl/test-e2e/ESIMD/usm_gather_scatter_rgba_64.cpp @@ -5,7 +5,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// RUN: %{build} -o %t.out +// Shouldn't have to use -fsycl-decompose-functor, +// See https://github.com/intel/llvm-test-suite/issues/18317 +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // // The test checks functionality of the gather_rgba/scatter_rgba USM-based ESIMD diff --git a/sycl/test-e2e/ESIMD/vadd_raw_send_gen12.cpp b/sycl/test-e2e/ESIMD/vadd_raw_send_gen12.cpp index d46907dae5bdc..ef5d2e598e12c 100644 --- a/sycl/test-e2e/ESIMD/vadd_raw_send_gen12.cpp +++ b/sycl/test-e2e/ESIMD/vadd_raw_send_gen12.cpp @@ -6,7 +6,9 @@ // //===---------------------------------------------------------===// // REQUIRES: gpu-intel-gen12 -// RUN: %{build} -o %t.out +// Shouldn't have to use -fsycl-decompose-functor, +// See https://github.com/intel/llvm-test-suite/issues/18317 +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out #include "esimd_test_utils.hpp" diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index 4dfe5928bd5ee..f899caf6cfbca 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -1,6 +1,6 @@ // REQUIRES: xptifw, opencl, cpu, linux // RUN: %build_collector -// RUN: %{build} -o %t.out +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: env UR_ENABLE_LAYERS=UR_LAYER_TRACING env XPTI_TRACE_ENABLE=1 env XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher env XPTI_SUBSCRIBERS=%t_collector.dll %{run} %t.out | FileCheck %s #include "basic_event_collection.inc" From 8c397b717f3b48aeddb81572681475e32d0b45c5 Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Thu, 5 Jun 2025 16:16:06 +0100 Subject: [PATCH 6/6] WIP --- clang/lib/Sema/SemaSYCL.cpp | 17 ++-- .../test/CodeGenSYCL/bool-kernel-argument.cpp | 8 +- .../free_function_kernel_params.cpp | 10 +-- .../generated-types-initialization.cpp | 30 ++----- .../test/CodeGenSYCL/infer-address-spaces.cpp | 4 +- clang/test/CodeGenSYCL/inheritance.cpp | 2 +- clang/test/CodeGenSYCL/intel-restrict.cpp | 2 +- .../kernel-arg-accessor-pointer.cpp | 5 +- .../CodeGenSYCL/kernel-device-space-arg.cpp | 3 +- clang/test/CodeGenSYCL/kernel-handler.cpp | 4 +- .../CodeGenSYCL/kernel-param-pod-array.cpp | 82 +------------------ .../test/CodeGenSYCL/kernel_binding_decls.cpp | 22 +---- .../nontrivial_device_copyable.cpp | 2 +- .../test/CodeGenSYCL/pointers-in-structs.cpp | 2 +- clang/test/CodeGenSYCL/spir-enum.cpp | 2 +- .../test/CodeGenSYCL/sycl-intelfpga-field.cpp | 2 +- .../CodeGenSYCL/sycl_unaliased_property.cpp | 2 +- clang/test/CodeGenSYCL/union-kernel-param.cpp | 2 +- clang/test/CodeGenSYCL/usm-int-header.cpp | 4 +- clang/test/SemaSYCL/array-kernel-param.cpp | 2 +- .../SemaSYCL/binding_decl_lambda_nullptr.cpp | 2 +- .../SemaSYCL/built-in-type-kernel-arg.cpp | 2 +- clang/test/SemaSYCL/decomposition.cpp | 2 +- .../free_function_array_kernel_param.cpp | 2 +- .../SemaSYCL/free_function_kernel_params.cpp | 2 +- clang/test/SemaSYCL/half-kernel-arg.cpp | 2 +- clang/test/SemaSYCL/inheritance.cpp | 2 +- clang/test/SemaSYCL/kernel-arg-opt-report.cpp | 4 +- clang/test/SemaSYCL/kernel-handler.cpp | 2 +- clang/test/SemaSYCL/union-kernel-param.cpp | 2 +- clang/test/SemaSYCL/union-kernel-param1.cpp | 2 +- clang/test/SemaSYCL/union-kernel-param2.cpp | 2 +- .../RecordReplay/add_nodes_after_finalize.cpp | 5 +- 33 files changed, 60 insertions(+), 178 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index a7075340fce5b..6dce61ac4eb79 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -318,10 +318,6 @@ ExprResult SemaSYCL::BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc, SYCLBuiltinBaseTypeExpr(Loc, SourceTy, Idx, BaseTy); } -/// Returns true if the target requires a new type. -/// This happens if a pointer to generic cannot be passed -static bool targetRequiresNewType(ASTContext &Context) { return false; } - // This information is from Section 4.13 of the SYCL spec // https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf // This function returns false if the math lib function @@ -1567,13 +1563,14 @@ class KernelObjVisitor { public: KernelObjVisitor(SemaSYCL &S) : SemaSYCLRef(S) {} - static bool useTopLevelKernelObj(const CXXRecordDecl *KernelObj) { + static bool useTopLevelKernelObj(SemaSYCL &SemaSYCLRef, + const CXXRecordDecl *KernelObj) { // If the kernel is empty, "decompose" it so we don't generate arguments. if (KernelObj->isEmpty()) return false; // FIXME: Workaround to not change large number of tests // this is covered by the test below. - if (targetRequiresNewType(KernelObj->getASTContext())) + if (SemaSYCLRef.getLangOpts().SYCLDecomposeStruct) return false; if (KernelObj->hasAttr() || KernelObj->hasAttr()) @@ -1612,7 +1609,7 @@ class KernelObjVisitor { template void VisitKernelRecord(const CXXRecordDecl *KernelObj, QualType KernelFunctorTy, HandlerTys &...Handlers) { - if (!useTopLevelKernelObj(KernelObj)) { + if (!useTopLevelKernelObj(SemaSYCLRef, KernelObj)) { VisitRecordBases(KernelObj, Handlers...); VisitRecordFields(KernelObj, Handlers...); } else { @@ -2294,12 +2291,12 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { } bool handlePointerType(FieldDecl *, QualType) final { - PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext()); + PointerStack.back() = SemaSYCLRef.getLangOpts().SYCLDecomposeStruct; return true; } bool handlePointerType(ParmVarDecl *, QualType) final { - PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext()); + PointerStack.back() = SemaSYCLRef.getLangOpts().SYCLDecomposeStruct; return true; } @@ -4146,7 +4143,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { FunctionDecl *KernelCallerFunc, bool IsSIMDKernel, CXXMethodDecl *CallOperator) : SyclKernelFieldHandler(S), - UseTopLevelKernelObj(KernelObjVisitor::useTopLevelKernelObj(KernelObj)), + UseTopLevelKernelObj(KernelObjVisitor::useTopLevelKernelObj(S, KernelObj)), DeclCreator(DC), KernelObjClone(UseTopLevelKernelObj ? nullptr diff --git a/clang/test/CodeGenSYCL/bool-kernel-argument.cpp b/clang/test/CodeGenSYCL/bool-kernel-argument.cpp index a75af985d46de..d0269d012707d 100644 --- a/clang/test/CodeGenSYCL/bool-kernel-argument.cpp +++ b/clang/test/CodeGenSYCL/bool-kernel-argument.cpp @@ -8,11 +8,9 @@ int main() { bool test = false; sycl::queue q; - // CHECK: @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E11test_kernel(i8 {{.*}} [[ARG:%[A-Za-z_0-9]*]] - // CHECK: %__SYCLKernel = alloca - // CHECK: %test = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %__SYCLKernel.ascast - // CHECK: store i8 %{{.*}}, ptr addrspace(4) %test - // CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv + // CHECK: @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E11test_kernel(ptr {{.*}} [[ARG:%[A-Za-z_0-9]*]] + // CHECK: [[ARG_CAST:%[A-Za-z_0-9.]*]] = addrspacecast ptr [[ARG]] to ptr addrspace(4) + // CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv({{.*}} [[ARG_CAST]]) // // CHECK: define {{.*}} @_Z9take_boolb(i1 q.submit([&](sycl::handler &h) { diff --git a/clang/test/CodeGenSYCL/free_function_kernel_params.cpp b/clang/test/CodeGenSYCL/free_function_kernel_params.cpp index 48ddd76ec99f3..24f9791bd978c 100644 --- a/clang/test/CodeGenSYCL/free_function_kernel_params.cpp +++ b/clang/test/CodeGenSYCL/free_function_kernel_params.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -triple spir64 \ +// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-decompose-functor -triple spir64 \ // RUN: -emit-llvm %s -o - | FileCheck %s // This test checks parameter IR generation for free functions with parameters // of non-decomposed struct type, work group memory type, dynamic work group memory type @@ -50,13 +50,9 @@ template void ff_6(KArgWithPtrArray KArg); // CHECK: %struct.NoPointers = type { i32 } // CHECK: %struct.Pointers = type { ptr addrspace(4), ptr addrspace(4) } // CHECK: %struct.Agg = type { %struct.NoPointers, i32, ptr addrspace(4), %struct.Pointers } -// CHECK: %struct.__generated_Pointers = type { ptr addrspace(1), ptr addrspace(1) } -// CHECK: %struct.__generated_Agg = type { %struct.NoPointers, i32, ptr addrspace(1), %struct.__generated_Pointers.4 } -// CHECK: %struct.__generated_Pointers.4 = type { ptr addrspace(1), ptr addrspace(1) } -// CHECK: %struct.__generated_KArgWithPtrArray = type { [3 x ptr addrspace(1)], [3 x i32], [3 x i32] } // CHECK: %struct.KArgWithPtrArray = type { [3 x ptr addrspace(4)], [3 x i32], [3 x i32] } -// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel{{.*}}(ptr noundef byval(%struct.NoPointers) align 4 %__arg_S1, ptr noundef byval(%struct.__generated_Pointers) align 8 %__arg_S2, ptr noundef byval(%struct.__generated_Agg) align 8 %__arg_S3) -// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel_ff_6{{.*}}(ptr noundef byval(%struct.__generated_KArgWithPtrArray) align 8 %__arg_KArg) +// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel{{.*}}(ptr noundef byval(%struct.NoPointers) align 4 %__arg_S1, ptr noundef byval(%struct.Pointers) align 8 %__arg_S2, ptr noundef byval(%struct.Agg) align 8 %__arg_S3) +// CHECK: define dso_local spir_kernel void @{{.*}}__sycl_kernel_ff_6{{.*}}(ptr noundef byval(%struct.KArgWithPtrArray) align 8 %__arg_KArg) __attribute__((sycl_device)) [[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]] diff --git a/clang/test/CodeGenSYCL/generated-types-initialization.cpp b/clang/test/CodeGenSYCL/generated-types-initialization.cpp index 91c13fa271222..b26ce3bfe3e08 100644 --- a/clang/test/CodeGenSYCL/generated-types-initialization.cpp +++ b/clang/test/CodeGenSYCL/generated-types-initialization.cpp @@ -38,34 +38,14 @@ int main() { }); return 0; } -// CHECK: define dso_local spir_kernel void @{{.*}}basic(ptr noundef byval(%struct.__generated_B) align 8 %_arg_Obj) -// -// Kernel object clone. -// CHECK: %[[K:[a-zA-Z0-9_.]+]] = alloca %class.anon -// CHECK: %[[K_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[K]] to ptr addrspace(4) -// -// Argument reference. -// CHECK: %[[Arg_ref:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg_Obj to ptr addrspace(4) -// -// Initialization. -// CHECK: %[[GEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %[[K_as_cast]], i32 0, i32 0 -// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[GEP]], ptr addrspace(4) align 8 %[[Arg_ref]], i64 16, i1 false) +// CHECK: define dso_local spir_kernel void @{{.*}}basic(ptr noundef byval(%class.anon) align 8 %_arg__sycl_functor) // // Kernel body call. -// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[K_as_cast]]) +// CHECK: %[[Obj_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__sycl_functor to ptr addrspace(4) +// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[Obj_as_cast]]) -// CHECK: define dso_local spir_kernel void @{{.*}}nns(ptr noundef byval(%struct.__generated_B.0) align 8 %_arg_NNSObj) -// -// Kernel object clone. -// CHECK: %[[NNSK:[a-zA-Z0-9_.]+]] = alloca %class.anon.2 -// CHECK: %[[NNSK_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[NNSK]] to ptr addrspace(4) -// -// Argument reference. -// CHECK: %[[NNSArg_ref:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg_NNSObj to ptr addrspace(4) -// -// Initialization. -// CHECK: %[[NNSGEP:[a-zA-Z0-9_.]+]] = getelementptr inbounds nuw %class.anon.2, ptr addrspace(4) %[[NNSK_as_cast]], i32 0, i32 0 -// CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 8 %[[NNSGEP]], ptr addrspace(4) align 8 %[[NNSArg_ref]], i64 16, i1 false) +// CHECK: define dso_local spir_kernel void @{{.*}}nns(ptr noundef byval(%class.anon.0) align 8 %_arg__sycl_functor) // // Kernel body call. +// CHECK: %[[NNSK_as_cast:[a-zA-Z0-9_.]+]] = addrspacecast ptr %_arg__sycl_functor to ptr addrspace(4) // CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) %[[NNSK_as_cast]]) diff --git a/clang/test/CodeGenSYCL/infer-address-spaces.cpp b/clang/test/CodeGenSYCL/infer-address-spaces.cpp index 995dc52870ee6..698dfe743f7ac 100644 --- a/clang/test/CodeGenSYCL/infer-address-spaces.cpp +++ b/clang/test/CodeGenSYCL/infer-address-spaces.cpp @@ -17,5 +17,5 @@ void foo(const float *usm_in, float* usm_out) { // No addrspacecast before loading and storing values // CHECK-NOT: addrspacecast -// CHECK: [[VAL:%.*]] = load float, ptr addrspace(1) -// CHECK: store float [[VAL]], ptr addrspace(1) +// CHECK: [[VAL:%.*]] = load float, ptr addrspace(4) +// CHECK: store float [[VAL]], ptr addrspace(4) diff --git a/clang/test/CodeGenSYCL/inheritance.cpp b/clang/test/CodeGenSYCL/inheritance.cpp index fc9e0957a7010..8b509fa57c0bc 100644 --- a/clang/test/CodeGenSYCL/inheritance.cpp +++ b/clang/test/CodeGenSYCL/inheritance.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s #include "Inputs/sycl.hpp" diff --git a/clang/test/CodeGenSYCL/intel-restrict.cpp b/clang/test/CodeGenSYCL/intel-restrict.cpp index e25abdcf14476..9a7a4a0c800a2 100644 --- a/clang/test/CodeGenSYCL/intel-restrict.cpp +++ b/clang/test/CodeGenSYCL/intel-restrict.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device %s -emit-llvm -triple spir64-unknown-unknown -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor %s -emit-llvm -triple spir64-unknown-unknown -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/kernel-arg-accessor-pointer.cpp b/clang/test/CodeGenSYCL/kernel-arg-accessor-pointer.cpp index db432d97e5b94..ab773adf7a536 100644 --- a/clang/test/CodeGenSYCL/kernel-arg-accessor-pointer.cpp +++ b/clang/test/CodeGenSYCL/kernel-arg-accessor-pointer.cpp @@ -129,14 +129,13 @@ int main() { // Check kernel_C parameters // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_C -// CHECK-SAME: i32 noundef [[MEM_ARG1:%[a-zA-Z0-9_]+]] +// CHECK-SAME: ptr noundef byval(%class.anon.3) align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]] // CHECK-NOT: kernel_arg_runtime_aligned // CHECK-NOT: kernel_arg_exclusive_ptr // Check usm_ptr parameters // CHECK: define {{.*}}spir_kernel void @{{.*}}usm_ptr -// CHECK-SAME: ptr addrspace(1) noundef align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]], -// CHECK-SAME: ptr addrspace(1) noundef align 4 [[MEM_ARG1:%[a-zA-Z0-9_]+]] +// CHECK-SAME: ptr noundef byval(%class.anon.4) align 8 [[MEM_ARG1:%[a-zA-Z0-9_]+]] // CHECK-NOT: kernel_arg_runtime_aligned // CHECK-NOT: kernel_arg_exclusive_ptr diff --git a/clang/test/CodeGenSYCL/kernel-device-space-arg.cpp b/clang/test/CodeGenSYCL/kernel-device-space-arg.cpp index 36e1adf6404ba..110f91d892dfe 100644 --- a/clang/test/CodeGenSYCL/kernel-device-space-arg.cpp +++ b/clang/test/CodeGenSYCL/kernel-device-space-arg.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm %s -disable-llvm-passes -o - | FileCheck %s -// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function(ptr addrspace(5) {{.*}} ptr addrspace(6) {{.*}} +// CHECK: %class.anon = type { ptr addrspace(5), ptr addrspace(6) } +// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function(ptr noundef byval(%class.anon) {{.*}} #include "Inputs/sycl.hpp" diff --git a/clang/test/CodeGenSYCL/kernel-handler.cpp b/clang/test/CodeGenSYCL/kernel-handler.cpp index c0fe1c12e934d..350cbd4db3479 100644 --- a/clang/test/CodeGenSYCL/kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/kernel-handler.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT // This test checks IR generated when kernel_handler argument // (used to handle SYCL 2020 specialization constants) is passed diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp index 180f65fda9a6b..452f74fe32082 100644 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp @@ -46,88 +46,12 @@ int main() { // Check kernel_B parameters // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_B -// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class) align 4 %[[ARR_ARG:.*]]) - -// Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon, align 4 -// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) - -// Check for Array init loop -// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds nuw %class{{.*}}.anon, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: %[[WRAPPER_PTR:.+]] = getelementptr inbounds nuw %struct{{.*}}.__wrapper_class, ptr addrspace(4) %[[ARR_ARG]].ascast, i32 0, i32 0 -// CHECK: %[[ARRAY_BEGIN:.+]] = getelementptr inbounds [2 x i32], ptr addrspace(4) %[[LAMBDA_PTR]], i64 0, i64 0 -// CHECK: br label %[[ARRAYINITBODY:.+]] - -// The loop body itself -// CHECK: [[ARRAYINITBODY]]: -// CHECK: %[[ARRAYINDEX:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX:.+]], %[[ARRAYINITBODY]] ] -// CHECK: %[[TARG_ARRAY_ELEM:.+]] = getelementptr inbounds i32, ptr addrspace(4) %[[ARRAY_BEGIN]], i64 %[[ARRAYINDEX]] -// CHECK: %[[SRC_ELEM:.+]] = getelementptr inbounds nuw [2 x i32], ptr addrspace(4) %[[WRAPPER_PTR]], i64 0, i64 %[[ARRAYINDEX]] -// CHECK: %[[SRC_VAL:.+]] = load i32, ptr addrspace(4) %[[SRC_ELEM]] -// CHECK: store i32 %[[SRC_VAL]], ptr addrspace(4) %[[TARG_ARRAY_ELEM]] -// CHECK: %[[NEXTINDEX]] = add nuw i64 %[[ARRAYINDEX]], 1 -// CHECK: %[[ISDONE:.+]] = icmp eq i64 %[[NEXTINDEX]], 2 -// CHECK: br i1 %[[ISDONE]], label %{{.*}}, label %[[ARRAYINITBODY]] +// CHECK-SAME:(ptr noundef byval(%class.anon) align 4 %[[ARR_ARG:.*]]) // Check kernel_C parameters // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_C -// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]]) - -// Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 -// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) - -// Check for Array init loop -// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds nuw %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: %[[WRAPPER_PTR:.+]] = getelementptr inbounds nuw %struct{{.*}}.__wrapper_class{{.*}}, ptr addrspace(4) %[[ARR_ARG]].ascast, i32 0, i32 0 -// CHECK: %[[ARRAY_BEGIN:.+]] = getelementptr inbounds [2 x %struct{{.*}}.foo], ptr addrspace(4) %[[LAMBDA_PTR]], i64 0, i64 0 -// CHECK: br label %[[ARRAYINITBODY:.+]] - -// The loop body itself -// CHECK: [[ARRAYINITBODY]]: -// CHECK: %[[ARRAYINDEX:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX:.+]], %[[ARRAYINITBODY]] ] -// CHECK: %[[TARG_ARRAY_ELEM:.+]] = getelementptr inbounds %struct{{.*}}.foo, ptr addrspace(4) %[[ARRAY_BEGIN]], i64 %[[ARRAYINDEX]] -// CHECK: %[[SRC_ELEM:.+]] = getelementptr inbounds nuw [2 x %struct{{.*}}.foo], ptr addrspace(4) %[[WRAPPER_PTR]], i64 0, i64 %[[ARRAYINDEX]] -// call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %[[TARG_ARRAY_ELEM]], ptr addrspace(4) align %[[SRC_ELEM]], i64 24, i1 false) -// CHECK: %[[NEXTINDEX]] = add nuw i64 %[[ARRAYINDEX]], 1 -// CHECK: %[[ISDONE:.+]] = icmp eq i64 %[[NEXTINDEX]], 2 -// CHECK: br i1 %[[ISDONE]], label %{{.*}}, label %[[ARRAYINITBODY]] +// CHECK-SAME:(ptr noundef byval(%class.anon.0) align 4 %[[ARR_ARG:.*]]) // Check kernel_D parameters // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_D -// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]]) - -// Check local lambda object alloca -// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4 -// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4) - -// Check for Array init loop -// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds nuw %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: %[[WRAPPER_PTR:.+]] = getelementptr inbounds nuw %struct{{.*}}.__wrapper_class{{.*}}, ptr addrspace(4) %[[ARR_ARG]].ascast, i32 0, i32 0 -// CHECK: %[[ARRAY_BEGIN:.+]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) %[[LAMBDA_PTR]], i64 0, i64 0 -// CHECK: br label %[[ARRAYINITBODY:.+]] - -// Check Outer loop. -// CHECK: [[ARRAYINITBODY]]: -// CHECK: %[[ARRAYINDEX:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX:.+]], %[[ARRAYINITEND:.+]] ] -// CHECK: %[[TARG_OUTER_ELEM:.+]] = getelementptr inbounds [1 x i32], ptr addrspace(4) %[[ARRAY_BEGIN]], i64 %[[ARRAYINDEX]] -// CHECK: %[[SRC_OUTER_ELEM:.+]] = getelementptr inbounds nuw [2 x [1 x i32]], ptr addrspace(4) %[[WRAPPER_PTR]], i64 0, i64 %[[ARRAYINDEX]] -// CHECK: %[[ARRAY_BEGIN_INNER:.+]] = getelementptr inbounds [1 x i32], ptr addrspace(4) %[[TARG_OUTER_ELEM]], i64 0, i64 0 -// CHECK: br label %[[ARRAYINITBODY_INNER:.+]] - -// Check Inner Loop -// CHECK: [[ARRAYINITBODY_INNER]]: -// CHECK: %[[ARRAYINDEX_INNER:.+]] = phi i64 [ 0, %{{.*}} ], [ %[[NEXTINDEX_INNER:.+]], %[[ARRAYINITBODY_INNER:.+]] ] -// CHECK: %[[TARG_INNER_ELEM:.+]] = getelementptr inbounds i32, ptr addrspace(4) %[[ARRAY_BEGIN_INNER]], i64 %[[ARRAYINDEX_INNER]] -// CHECK: %[[SRC_INNER_ELEM:.+]] = getelementptr inbounds nuw [1 x i32], ptr addrspace(4) %[[SRC_OUTER_ELEM]], i64 0, i64 %[[ARRAYINDEX_INNER]] -// CHECK: %[[SRC_LOAD:.+]] = load i32, ptr addrspace(4) %[[SRC_INNER_ELEM]] -// CHECK: store i32 %[[SRC_LOAD]], ptr addrspace(4) %[[TARG_INNER_ELEM]] -// CHECK: %[[NEXTINDEX_INNER]] = add nuw i64 %[[ARRAYINDEX_INNER]], 1 -// CHECK: %[[ISDONE_INNER:.+]] = icmp eq i64 %[[NEXTINDEX_INNER]], 1 -// CHECK: br i1 %[[ISDONE_INNER]], label %[[ARRAYINITEND]], label %[[ARRAYINITBODY_INNER]] - -// Check Inner loop 'end' -// CHECK: [[ARRAYINITEND]]: -// CHECK: %[[NEXTINDEX]] = add nuw i64 %[[ARRAYINDEX]], 1 -// CHECK: %[[ISDONE:.+]] = icmp eq i64 %[[NEXTINDEX]], 2 -// CHECK: br i1 %[[ISDONE]], label %{{.*}}, label %[[ARRAYINITBODY]] +// CHECK-SAME:(ptr noundef byval(%class.anon.1) align 4 %[[ARR_ARG:.*]]) diff --git a/clang/test/CodeGenSYCL/kernel_binding_decls.cpp b/clang/test/CodeGenSYCL/kernel_binding_decls.cpp index 6af15b3aaacda..7e7eb9e914a6f 100644 --- a/clang/test/CodeGenSYCL/kernel_binding_decls.cpp +++ b/clang/test/CodeGenSYCL/kernel_binding_decls.cpp @@ -19,28 +19,12 @@ void foo() { // CHECK: %class.anon = type { i32, float } // Check the sycl kernel arguments - one int and one float parameter -// CHECK: define {{.*}} spir_kernel void @{{.*}}foov{{.*}}(i32 {{.*}} %_arg_x, float {{.*}} %_arg_f2) +// CHECK: define {{.*}} spir_kernel void @{{.*}}foov{{.*}}(ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor) // CHECK: entry: - -// Check alloca of the captured types -// CHECK: %_arg_x.addr = alloca i32, align 4 -// CHECK: %_arg_f2.addr = alloca float, align 4 -// CHECK: %__SYCLKernel = alloca %class.anon, align 4 - -// Copy the parameters into the alloca-ed addresses -// CHECK: store i32 %_arg_x, ptr addrspace(4) %_arg_x.addr -// CHECK: store float %_arg_f2, ptr addrspace(4) %_arg_f2.addr - -// Store the int and the float into the struct created -// CHECK: %x = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %__SYCLKernel{{.*}}, i32 0, i32 0 -// CHECK: %0 = load i32, ptr addrspace(4) %_arg_x.addr -// CHECK: store i32 %0, ptr addrspace(4) %x -// CHECK: %f2 = getelementptr inbounds nuw %class.anon, ptr addrspace(4) %__SYCLKernel{{.*}}, i32 0, i32 1 -// CHECK: %1 = load float, ptr addrspace(4) %_arg_f2.addr -// CHECK: store float %1, ptr addrspace(4) %f2 +// CHECK: %_arg__sycl_functor.ascast = addrspacecast ptr %_arg__sycl_functor to ptr addrspace(4) // Call the lambda -// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) {{.*}} %__SYCLKernel{{.*}}) +// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) {{.*}} %_arg__sycl_functor.ascast) // CHECK: ret void // Check the lambda call diff --git a/clang/test/CodeGenSYCL/nontrivial_device_copyable.cpp b/clang/test/CodeGenSYCL/nontrivial_device_copyable.cpp index 04d8f32582f5c..7c4e27aedc17a 100644 --- a/clang/test/CodeGenSYCL/nontrivial_device_copyable.cpp +++ b/clang/test/CodeGenSYCL/nontrivial_device_copyable.cpp @@ -27,7 +27,7 @@ int main() { }); } -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name(ptr noundef byval(%struct.NontriviallyCopyable) +// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name(ptr noundef byval(%class.anon) // CHECK-NOT: define {{.*}}spir_func void @{{.*}}device_func{{.*}}({{.*}}byval(%struct.NontriviallyCopyable) // CHECK: define {{.*}}spir_func void @_Z11device_func20NontriviallyCopyable(ptr noundef %X) // CHECK: %X.indirect_addr = alloca ptr addrspace(4) diff --git a/clang/test/CodeGenSYCL/pointers-in-structs.cpp b/clang/test/CodeGenSYCL/pointers-in-structs.cpp index 690ce34e8980e..eafdf5d8c636e 100644 --- a/clang/test/CodeGenSYCL/pointers-in-structs.cpp +++ b/clang/test/CodeGenSYCL/pointers-in-structs.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s // This test checks that compiler generates correct address spaces for pointer // kernel arguments that are wrapped by struct. Generated class should retain diff --git a/clang/test/CodeGenSYCL/spir-enum.cpp b/clang/test/CodeGenSYCL/spir-enum.cpp index bd7dbecc6d5d2..2ba5baeaf0f74 100644 --- a/clang/test/CodeGenSYCL/spir-enum.cpp +++ b/clang/test/CodeGenSYCL/spir-enum.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -fsycl-decompose-functor -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/sycl-intelfpga-field.cpp b/clang/test/CodeGenSYCL/sycl-intelfpga-field.cpp index ca2d9d572c522..d0311a813f245 100644 --- a/clang/test/CodeGenSYCL/sycl-intelfpga-field.cpp +++ b/clang/test/CodeGenSYCL/sycl-intelfpga-field.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -fsycl-decompose-functor -emit-llvm %s -o - | FileCheck %s // This test checks that proper IR is generated for kernel field initialization, including // 4 cases: diff --git a/clang/test/CodeGenSYCL/sycl_unaliased_property.cpp b/clang/test/CodeGenSYCL/sycl_unaliased_property.cpp index f5ad55ae4f464..02fb9fdaed64a 100644 --- a/clang/test/CodeGenSYCL/sycl_unaliased_property.cpp +++ b/clang/test/CodeGenSYCL/sycl_unaliased_property.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device %s -emit-llvm -triple spir64-unknown-unknown -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor %s -emit-llvm -triple spir64-unknown-unknown -o - | FileCheck %s struct __attribute__((sycl_special_class)) [[__sycl_detail__::sycl_type(annotated_arg)]] diff --git a/clang/test/CodeGenSYCL/union-kernel-param.cpp b/clang/test/CodeGenSYCL/union-kernel-param.cpp index 0909e4720935d..a4150971291e5 100644 --- a/clang/test/CodeGenSYCL/union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/union-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -fsycl-decompose-functor -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s // This test checks a kernel argument that is union with both array and non-array fields. diff --git a/clang/test/CodeGenSYCL/usm-int-header.cpp b/clang/test/CodeGenSYCL/usm-int-header.cpp index 080772b4b4a60..2a704f0d8d311 100644 --- a/clang/test/CodeGenSYCL/usm-int-header.cpp +++ b/clang/test/CodeGenSYCL/usm-int-header.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s -// RUN: %clang_cc1 -fsycl-is-device -fsycl-int-header=%t.h %s -o %t.out +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -fsycl-int-header=%t.h %s -o %t.out // RUN: FileCheck -input-file=%t.h %s --check-prefix=INT-HEADER // INT-HEADER:{ kernel_param_kind_t::kind_pointer, 8, 0 }, diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp index d9b37c88b28dd..3d0951a6ca69b 100644 --- a/clang/test/SemaSYCL/array-kernel-param.cpp +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -ast-dump -sycl-std=2020 %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -internal-isystem %S/Inputs -ast-dump -sycl-std=2020 %s | FileCheck %s // This test checks that compiler generates correct kernel arguments for // arrays, Accessor arrays, and structs containing Accessors. diff --git a/clang/test/SemaSYCL/binding_decl_lambda_nullptr.cpp b/clang/test/SemaSYCL/binding_decl_lambda_nullptr.cpp index 9aca54588f388..a0db32dc32e9f 100644 --- a/clang/test/SemaSYCL/binding_decl_lambda_nullptr.cpp +++ b/clang/test/SemaSYCL/binding_decl_lambda_nullptr.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -fsyntax-only %s -verify=device -ast-dump | FileCheck %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-decompose-functor -std=c++20 -fsyntax-only %s -verify=device -ast-dump | FileCheck %s // RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-host -std=c++20 -fsyntax-only %s -verify=host // This test checks that when a binding declaration is captured that diff --git a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp index 5f3da924e982d..64a4c0ca56b6d 100644 --- a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp +++ b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -internal-isystem %S/Inputs -sycl-std=2020 -ast-dump %s | FileCheck %s // This test checks that compiler generates correct initialization for arguments // that have struct or built-in type inside the OpenCL kernel diff --git a/clang/test/SemaSYCL/decomposition.cpp b/clang/test/SemaSYCL/decomposition.cpp index f4699c3bd9ee8..93511187fdf68 100644 --- a/clang/test/SemaSYCL/decomposition.cpp +++ b/clang/test/SemaSYCL/decomposition.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -ast-dump -sycl-std=2020 %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -internal-isystem %S/Inputs -ast-dump -sycl-std=2020 %s | FileCheck %s // This test checks that the compiler decomposes structs containing special types only // (i.e. accessor/stream/sampler etc) and all others are passed without decomposition diff --git a/clang/test/SemaSYCL/free_function_array_kernel_param.cpp b/clang/test/SemaSYCL/free_function_array_kernel_param.cpp index cbb62eb59fa89..3480d6cfb5197 100755 --- a/clang/test/SemaSYCL/free_function_array_kernel_param.cpp +++ b/clang/test/SemaSYCL/free_function_array_kernel_param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ast-dump \ +// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-decompose-functor -ast-dump \ // RUN: %s -o - | FileCheck %s // This test checks parameter rewriting for free functions with parameters diff --git a/clang/test/SemaSYCL/free_function_kernel_params.cpp b/clang/test/SemaSYCL/free_function_kernel_params.cpp index e0f1e0665d551..671f7ed52bfe1 100644 --- a/clang/test/SemaSYCL/free_function_kernel_params.cpp +++ b/clang/test/SemaSYCL/free_function_kernel_params.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ast-dump \ +// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-decompose-functor -ast-dump \ // RUN: %s -o - | FileCheck %s // This test checks parameter rewriting for free functions with parameters // of type scalar, pointer, non-decomposed struct, work group memory, dynamic work group memory diff --git a/clang/test/SemaSYCL/half-kernel-arg.cpp b/clang/test/SemaSYCL/half-kernel-arg.cpp index b2a301782d2e0..f6c77b534adcd 100644 --- a/clang/test/SemaSYCL/half-kernel-arg.cpp +++ b/clang/test/SemaSYCL/half-kernel-arg.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -ast-dump -sycl-std=2020 %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -internal-isystem %S/Inputs -ast-dump -sycl-std=2020 %s | FileCheck %s // This test checks that compiler generates correct initialization for arguments // that have sycl::half type inside the OpenCL kernel diff --git a/clang/test/SemaSYCL/inheritance.cpp b/clang/test/SemaSYCL/inheritance.cpp index c78c0e40fb4ec..58fd28cb8e0cd 100644 --- a/clang/test/SemaSYCL/inheritance.cpp +++ b/clang/test/SemaSYCL/inheritance.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -ast-dump %s | FileCheck %s #include "Inputs/sycl.hpp" diff --git a/clang/test/SemaSYCL/kernel-arg-opt-report.cpp b/clang/test/SemaSYCL/kernel-arg-opt-report.cpp index 92f19374818ed..96f1355cef827 100644 --- a/clang/test/SemaSYCL/kernel-arg-opt-report.cpp +++ b/clang/test/SemaSYCL/kernel-arg-opt-report.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown -fsycl-is-device \ +// RUN: %clang_cc1 -triple spir64-unknown-unknown -fsycl-is-device -fsycl-decompose-functor \ // RUN: -emit-llvm-bc %s -o %t-host.bc -opt-record-file %t-host.yaml // RUN: FileCheck -check-prefix=SPIR --input-file %t-host.yaml %s -// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fsycl-is-device \ +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fsycl-is-device -fsycl-decompose-functor \ // RUN: -emit-llvm-bc %s -o %t-host.bc -opt-record-file %t-host.yaml // RUN: FileCheck -check-prefix=NVPTX --input-file %t-host.yaml %s // The test generates remarks about the kernel argument, their location and type diff --git a/clang/test/SemaSYCL/kernel-handler.cpp b/clang/test/SemaSYCL/kernel-handler.cpp index ec9644a3bec24..e73c24f6d89e3 100644 --- a/clang/test/SemaSYCL/kernel-handler.cpp +++ b/clang/test/SemaSYCL/kernel-handler.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-decompose-functor -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NATIVESUPPORT // This test checks that the compiler handles kernel_handler type (for diff --git a/clang/test/SemaSYCL/union-kernel-param.cpp b/clang/test/SemaSYCL/union-kernel-param.cpp index 312c0b8f05009..63ab270edac67 100644 --- a/clang/test/SemaSYCL/union-kernel-param.cpp +++ b/clang/test/SemaSYCL/union-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -ast-dump %s | FileCheck %s // This test checks that compiler generates correct kernel arguments for // union without array. diff --git a/clang/test/SemaSYCL/union-kernel-param1.cpp b/clang/test/SemaSYCL/union-kernel-param1.cpp index aef01a26b140a..eaa04facb4ed1 100644 --- a/clang/test/SemaSYCL/union-kernel-param1.cpp +++ b/clang/test/SemaSYCL/union-kernel-param1.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -ast-dump %s | FileCheck %s // This test checks that compiler generates correct kernel arguments for // unions containing Arrays. diff --git a/clang/test/SemaSYCL/union-kernel-param2.cpp b/clang/test/SemaSYCL/union-kernel-param2.cpp index 920d73603e6c1..719a692008650 100644 --- a/clang/test/SemaSYCL/union-kernel-param2.cpp +++ b/clang/test/SemaSYCL/union-kernel-param2.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -internal-isystem %S/Inputs -ast-dump %s | FileCheck %s // This test checks that compiler generates correct kernel arguments for // a struct-with-an-array-of-unions and a array-of-struct-with-a-union. diff --git a/sycl/test-e2e/Graph/RecordReplay/add_nodes_after_finalize.cpp b/sycl/test-e2e/Graph/RecordReplay/add_nodes_after_finalize.cpp index 1a3615f167e90..ee648113516a4 100644 --- a/sycl/test-e2e/Graph/RecordReplay/add_nodes_after_finalize.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/add_nodes_after_finalize.cpp @@ -1,4 +1,7 @@ -// RUN: %{build} -o %t.out +// By default functors are no longer decomposed preventing the use of set_arg in +// this test, -fsycl-decompose-functor is used to force the old behavior +// +// RUN: %{build} -fsycl-decompose-functor -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}