Skip to content

Commit cbfe079

Browse files
committed
[UR][Graph] in-order USM Memcpy regression CTS test
Create a UR CTS test based on the E2E test [Graph/RecordReplay/usm_copy_in_order.cpp](https://github.com/intel/llvm/blob/sycl/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp) to help debug #18169
1 parent c997f77 commit cbfe079

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed

unified-runtime/test/conformance/exp_command_buffer/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_conformance_test_with_kernels_environment(exp_command_buffer
2121
update/event_sync.cpp
2222
update/kernel_event_sync.cpp
2323
update/local_memory_update.cpp
24+
regression/usm_copy.cpp
2425
)
2526

2627
add_subdirectory(native-command)
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
3+
// Exceptions. See LICENSE.TXT
4+
//
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include "../fixtures.h"
8+
9+
// UR reproducer for SYCL-Graph E2E test "RecordReplay/usm_copy_in_order.cpp"
10+
// Note that the kernel code is different, in that this test uses the
11+
// saxpy_usm kernel, but the sequence of operations is the same.
12+
struct urCommandBufferUSMCopyInOrderTest
13+
: uur::command_buffer::urCommandBufferExpExecutionTest {
14+
virtual void SetUp() override {
15+
program_name = "saxpy_usm";
16+
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp());
17+
18+
// Create in-order command-buffer
19+
ur_exp_command_buffer_desc_t desc{
20+
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, // stype
21+
nullptr, // pNext
22+
false, // isUpdatable
23+
true, // isInOrder
24+
false // enableProfiling
25+
};
26+
ASSERT_SUCCESS(
27+
urCommandBufferCreateExp(context, device, &desc, &in_order_cmd_buf));
28+
ASSERT_NE(in_order_cmd_buf, nullptr);
29+
30+
// Create 4 device USM allocations and initialize elements to list index
31+
for (unsigned i = 0; i < device_ptrs.size(); i++) {
32+
auto &device_ptr = device_ptrs[i];
33+
ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
34+
allocation_size, &device_ptr));
35+
ASSERT_NE(device_ptr, nullptr);
36+
37+
uint32_t pattern = i;
38+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptr, sizeof(pattern),
39+
&pattern, allocation_size, 0, nullptr,
40+
nullptr));
41+
}
42+
ASSERT_SUCCESS(urQueueFinish(queue));
43+
44+
// Index 0 is output
45+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, device_ptrs[0]));
46+
// Index 1 is A
47+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A));
48+
// Index 2 is X
49+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 2, nullptr, device_ptrs[1]));
50+
// Index 3 is Y
51+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 3, nullptr, device_ptrs[2]));
52+
}
53+
54+
virtual void TearDown() override {
55+
for (auto &device_ptr : device_ptrs) {
56+
if (device_ptr) {
57+
EXPECT_SUCCESS(urUSMFree(context, device_ptr));
58+
}
59+
}
60+
if (in_order_cmd_buf) {
61+
EXPECT_SUCCESS(urCommandBufferReleaseExp(in_order_cmd_buf));
62+
}
63+
64+
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::TearDown());
65+
}
66+
67+
ur_exp_command_buffer_handle_t in_order_cmd_buf = nullptr;
68+
static constexpr size_t global_size = 10;
69+
static constexpr size_t global_offset = 0;
70+
static constexpr size_t n_dimensions = 1;
71+
static constexpr size_t allocation_size = sizeof(uint32_t) * global_size;
72+
static constexpr uint32_t A = 42;
73+
std::array<void *, 4> device_ptrs = {nullptr, nullptr, nullptr, nullptr};
74+
};
75+
76+
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urCommandBufferUSMCopyInOrderTest);
77+
TEST_P(urCommandBufferUSMCopyInOrderTest, Success) {
78+
// Do an eager kernel enqueue without wait on completion
79+
// D[0] = A * D[1] + D[2]
80+
// D[0] = 42 * 1 + 2
81+
// D[0] = 44
82+
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
83+
&global_offset, &global_size, nullptr, 0,
84+
nullptr, nullptr));
85+
86+
// command-buffer sync point used to enforce linear dependencies when
87+
// appending commands to the command-buffer.
88+
ur_exp_command_buffer_sync_point_t sync_point;
89+
90+
// Add SAXPY kernel node to command-buffer
91+
// D[3] = A * D[1] + D[0]
92+
// D[3] = 42 * 1 + 44
93+
// D[3] = 86
94+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 3, nullptr, device_ptrs[0]));
95+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, device_ptrs[3]));
96+
ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(
97+
in_order_cmd_buf, kernel, n_dimensions, &global_offset, &global_size,
98+
nullptr, 0, nullptr, 0, nullptr, 0, nullptr, &sync_point, nullptr,
99+
nullptr));
100+
101+
// Add device-to-device memcpy node from output of previous command to
102+
// the X component of the expression.
103+
// D[1] = 86
104+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
105+
in_order_cmd_buf, device_ptrs[1], device_ptrs[3], allocation_size, 0,
106+
nullptr, 0, nullptr, &sync_point, nullptr, nullptr));
107+
108+
// Add SAXPY kernel node
109+
// D[3] = A * [1] + [0]
110+
// D[3] = 42 * 86 + 44
111+
// D[3] = 3656
112+
ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(
113+
in_order_cmd_buf, kernel, n_dimensions, &global_offset, &global_size,
114+
nullptr, 0, nullptr, 1, &sync_point, 0, nullptr, &sync_point, nullptr,
115+
nullptr));
116+
117+
// Add device-to-device memcpy node from output of previous command to
118+
// currently unused USM allocation.
119+
// D[2] = 3656
120+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
121+
in_order_cmd_buf, device_ptrs[2], device_ptrs[3], allocation_size, 1,
122+
&sync_point, 0, nullptr, &sync_point, nullptr, nullptr));
123+
124+
// Add device-to-host memcpy node
125+
std::vector<uint32_t> output(global_size);
126+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
127+
in_order_cmd_buf, output.data(), device_ptrs[2], allocation_size, 1,
128+
&sync_point, 0, nullptr, &sync_point, nullptr, nullptr));
129+
ASSERT_SUCCESS(urCommandBufferFinalizeExp(in_order_cmd_buf));
130+
131+
ASSERT_SUCCESS(
132+
urEnqueueCommandBufferExp(queue, in_order_cmd_buf, 0, nullptr, nullptr));
133+
ASSERT_SUCCESS(urQueueFinish(queue));
134+
135+
// Verify
136+
constexpr uint32_t result1 = A * 1 + 2; // eager kernel submission
137+
constexpr uint32_t result2 = A * 1 + result1; // first kernel command
138+
constexpr uint32_t result3 = A * result2 + result1; // second kernel command
139+
for (size_t i = 0; i < global_size; i++) {
140+
ASSERT_EQ(result3, output[i]);
141+
}
142+
}

0 commit comments

Comments
 (0)