Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit b704d08

Browse files
committed
[SYCL][ESIMD] Add InlineAsm tests
Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 1cf4f4c commit b704d08

File tree

4 files changed

+451
-0
lines changed

4 files changed

+451
-0
lines changed

SYCL/ESIMD/InlineAsm/asm_glb.cpp

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
//==---------------- asm_glb.cpp - DPC++ ESIMD on-device test -------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu
9+
// UNSUPPORTED: cuda || hip
10+
// RUN: %clangxx -fsycl %s -o %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
13+
#include "../esimd_test_utils.hpp"
14+
15+
#include <iostream>
16+
#include <sycl/ext/intel/esimd.hpp>
17+
#include <sycl/sycl.hpp>
18+
19+
using namespace sycl;
20+
using namespace sycl::ext::intel::esimd;
21+
22+
ESIMD_PRIVATE ESIMD_REGISTER(0) simd<float, 16> va;
23+
24+
int main(void) {
25+
constexpr unsigned Size = 1024 * 128;
26+
constexpr unsigned VL = 16;
27+
28+
float *A = new float[Size];
29+
float *B = new float[Size];
30+
float *C = new float[Size];
31+
32+
for (unsigned i = 0; i < Size; ++i) {
33+
A[i] = B[i] = i;
34+
C[i] = 0.0f;
35+
}
36+
37+
try {
38+
buffer<float, 1> bufa(A, range<1>(Size));
39+
buffer<float, 1> bufb(B, range<1>(Size));
40+
buffer<float, 1> bufc(C, range<1>(Size));
41+
42+
// We need that many workgroups
43+
range<1> GlobalRange{Size / VL};
44+
45+
// We need that many threads in each group
46+
range<1> LocalRange{1};
47+
48+
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
49+
50+
auto dev = q.get_device();
51+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
52+
53+
auto e = q.submit([&](handler &cgh) {
54+
auto PA = bufa.get_access<access::mode::read>(cgh);
55+
auto PB = bufb.get_access<access::mode::read>(cgh);
56+
auto PC = bufc.get_access<access::mode::write>(cgh);
57+
cgh.parallel_for<class Test>(
58+
GlobalRange * LocalRange, [=](id<1> i) SYCL_ESIMD_KERNEL {
59+
using namespace sycl::ext::intel::esimd;
60+
unsigned int offset = i * VL * sizeof(float);
61+
va.copy_from(PA, offset);
62+
simd<float, VL> vb;
63+
vb.copy_from(PB, offset);
64+
simd<float, VL> vc;
65+
#ifdef __SYCL_DEVICE_ONLY__
66+
__asm__("add (M1, 16) %0 %1 %2"
67+
: "=rw"(vc.data_ref())
68+
: "rw"(va.data()), "rw"(vb.data()));
69+
#else
70+
vc = va+vb;
71+
#endif
72+
vc.copy_to(PC, offset);
73+
});
74+
});
75+
e.wait();
76+
} catch (sycl::exception const &e) {
77+
std::cout << "SYCL exception caught: " << e.what() << '\n';
78+
79+
delete[] A;
80+
delete[] B;
81+
delete[] C;
82+
return 1;
83+
}
84+
85+
int err_cnt = 0;
86+
87+
for (unsigned i = 0; i < Size; ++i) {
88+
if (A[i] + B[i] != C[i]) {
89+
if (++err_cnt < 10) {
90+
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
91+
<< " + " << B[i] << "\n";
92+
}
93+
}
94+
}
95+
if (err_cnt > 0) {
96+
std::cout << " pass rate: "
97+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
98+
<< (Size - err_cnt) << "/" << Size << ")\n";
99+
}
100+
101+
delete[] A;
102+
delete[] B;
103+
delete[] C;
104+
105+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
106+
return err_cnt > 0 ? 1 : 0;
107+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
//==---------------- asm_simd_mask.cpp - DPC++ ESIMD on-device test
2+
//-------------==//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
// REQUIRES: gpu
10+
// UNSUPPORTED: cuda || hip
11+
// RUN: %clangxx -fsycl %s -o %t.out
12+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
13+
14+
#include "../esimd_test_utils.hpp"
15+
16+
#include <iostream>
17+
#include <sycl/ext/intel/esimd.hpp>
18+
#include <sycl/sycl.hpp>
19+
20+
using namespace sycl;
21+
22+
int main(void) {
23+
constexpr unsigned Size = 1024 * 128;
24+
constexpr unsigned VL = 8;
25+
26+
float *A = new float[Size];
27+
float *B = new float[Size];
28+
float *C = new float[Size];
29+
30+
for (unsigned i = 0; i < Size; ++i) {
31+
A[i] = B[i] = i;
32+
C[i] = 0.0f;
33+
}
34+
35+
try {
36+
buffer<float, 1> bufa(A, range<1>(Size));
37+
buffer<float, 1> bufb(B, range<1>(Size));
38+
buffer<float, 1> bufc(C, range<1>(Size));
39+
40+
// We need that many workgroups
41+
range<1> GlobalRange{Size / VL};
42+
43+
// We need that many threads in each group
44+
range<1> LocalRange{1};
45+
46+
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
47+
48+
auto dev = q.get_device();
49+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
50+
51+
auto e = q.submit([&](handler &cgh) {
52+
auto PA = bufa.get_access<access::mode::read>(cgh);
53+
auto PB = bufb.get_access<access::mode::read>(cgh);
54+
auto PC = bufc.get_access<access::mode::write>(cgh);
55+
cgh.parallel_for<class Test>(
56+
GlobalRange * LocalRange, [=](id<1> i) SYCL_ESIMD_KERNEL {
57+
using namespace sycl::ext::intel::esimd;
58+
unsigned int offset = i * VL * sizeof(float);
59+
simd<float, VL> va;
60+
va.copy_from(PA, offset);
61+
simd<float, VL> vb;
62+
vb.copy_from(PB, offset);
63+
simd<float, VL> vc;
64+
#ifdef __SYCL_DEVICE_ONLY__
65+
simd_mask<VL> m;
66+
__asm__("mov (M1, 8) %0 0x1010101:v" : "=rw"(m.data_ref()));
67+
__asm__("{\n"
68+
".decl P1 v_type=P num_elts=8\n"
69+
"mov (M1, 8) %0 0x1:ud\n"
70+
"setp (M1, 8) P1 %3\n"
71+
"(P1) add (M1, 8) %0 %1 %2\n"
72+
"}"
73+
: "=rw"(vc.data_ref())
74+
: "rw"(va.data()), "rw"(vb.data()), "rw"(m.data()));
75+
#else
76+
simd_mask<VL> m({1,0,1,0,1,0,1,0});
77+
vc = va+vb;
78+
vc.merge(1, !m);
79+
#endif
80+
vc.copy_to(PC, offset);
81+
});
82+
});
83+
e.wait();
84+
} catch (sycl::exception const &e) {
85+
std::cout << "SYCL exception caught: " << e.what() << '\n';
86+
87+
delete[] A;
88+
delete[] B;
89+
delete[] C;
90+
return 1;
91+
}
92+
93+
int err_cnt = 0;
94+
95+
for (unsigned i = 0; i < Size; ++i) {
96+
if ((i % 2 == 0) && (A[i] + B[i] != C[i])) {
97+
if (++err_cnt < 10) {
98+
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
99+
<< " + " << B[i] << "\n";
100+
}
101+
} else if ((i % 2 == 1) && (C[i] != 1)) {
102+
if (++err_cnt < 10) {
103+
std::cout << "failed at index " << i << ", " << C[i] << " != 1\n";
104+
}
105+
}
106+
}
107+
if (err_cnt > 0) {
108+
std::cout << " pass rate: "
109+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
110+
<< (Size - err_cnt) << "/" << Size << ")\n";
111+
}
112+
113+
delete[] A;
114+
delete[] B;
115+
delete[] C;
116+
117+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
118+
return err_cnt > 0 ? 1 : 0;
119+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
//==---------------- asm_simd_view.cpp - DPC++ ESIMD on-device test
2+
//-------------==//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
// REQUIRES: gpu
10+
// UNSUPPORTED: cuda || hip
11+
// RUN: %clangxx -fsycl %s -o %t.out
12+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
13+
14+
#include "../esimd_test_utils.hpp"
15+
16+
#include <iostream>
17+
#include <sycl/ext/intel/esimd.hpp>
18+
#include <sycl/sycl.hpp>
19+
20+
using namespace sycl;
21+
22+
int main(void) {
23+
constexpr unsigned Size = 1024 * 128;
24+
constexpr unsigned VL = 16;
25+
26+
float *A = new float[Size];
27+
float *B = new float[Size];
28+
float *C = new float[Size];
29+
30+
for (unsigned i = 0; i < Size; ++i) {
31+
A[i] = B[i] = i;
32+
C[i] = 0.0f;
33+
}
34+
35+
try {
36+
buffer<float, 1> bufa(A, range<1>(Size));
37+
buffer<float, 1> bufb(B, range<1>(Size));
38+
buffer<float, 1> bufc(C, range<1>(Size));
39+
40+
// We need that many workgroups
41+
range<1> GlobalRange{Size / VL};
42+
43+
// We need that many threads in each group
44+
range<1> LocalRange{1};
45+
46+
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
47+
48+
auto dev = q.get_device();
49+
std::cout << "Running on " << dev.get_info<info::device::name>() << "\n";
50+
51+
auto e = q.submit([&](handler &cgh) {
52+
auto PA = bufa.get_access<access::mode::read>(cgh);
53+
auto PB = bufb.get_access<access::mode::read>(cgh);
54+
auto PC = bufc.get_access<access::mode::write>(cgh);
55+
cgh.parallel_for<class Test>(
56+
GlobalRange * LocalRange, [=](id<1> i) SYCL_ESIMD_KERNEL {
57+
using namespace sycl::ext::intel::esimd;
58+
unsigned int offset = i * VL * sizeof(float);
59+
simd<float, VL> va;
60+
va.copy_from(PA, offset);
61+
simd<float, VL> vb;
62+
vb.copy_from(PB, offset);
63+
#ifdef __SYCL_DEVICE_ONLY__
64+
auto va_half1 = va.select<8, 1>();
65+
auto va_half2 = va.select<8, 1>(8);
66+
auto vb_half1 = vb.select<8, 1>();
67+
auto vb_half2 = vb.select<8, 1>(8);
68+
simd<float, VL / 2> out1;
69+
simd<float, VL / 2> out2;
70+
// simd_view is not supported in l-value context in inline asm, so
71+
// use simd to store the result
72+
__asm__("add (M1, 8) %0 %1 %2"
73+
: "=rw"(out1.data_ref())
74+
: "rw"(va_half1.data()), "rw"(vb_half1.data()));
75+
__asm__("add (M1, 8) %0 %1 %2"
76+
: "=rw"(out2.data_ref())
77+
: "rw"(va_half2.data()), "rw"(vb_half2.data()));
78+
out1.copy_to(PC, offset);
79+
out2.copy_to(PC, offset + ((VL / 2) * sizeof(float)));
80+
#else
81+
simd<float, VL> vc;
82+
vc = va+vb;
83+
vc.copy_to(PC, offset);
84+
#endif
85+
});
86+
});
87+
e.wait();
88+
} catch (sycl::exception const &e) {
89+
std::cout << "SYCL exception caught: " << e.what() << '\n';
90+
91+
delete[] A;
92+
delete[] B;
93+
delete[] C;
94+
return 1;
95+
}
96+
97+
int err_cnt = 0;
98+
99+
for (unsigned i = 0; i < Size; ++i) {
100+
if (A[i] + B[i] != C[i]) {
101+
if (++err_cnt < 10) {
102+
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
103+
<< " + " << B[i] << "\n";
104+
}
105+
}
106+
}
107+
if (err_cnt > 0) {
108+
std::cout << " pass rate: "
109+
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
110+
<< (Size - err_cnt) << "/" << Size << ")\n";
111+
}
112+
113+
delete[] A;
114+
delete[] B;
115+
delete[] C;
116+
117+
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
118+
return err_cnt > 0 ? 1 : 0;
119+
}

0 commit comments

Comments
 (0)