Skip to content

Commit a7c8cbc

Browse files
rjourslervpirogov
authored andcommitted
gpu: ocl: rely on get_sub_group_size() for post_ops
Avoids the possibility that defines may differ from what is actually being compiled.
1 parent 44355a6 commit a7c8cbc

File tree

1 file changed

+3
-7
lines changed

1 file changed

+3
-7
lines changed

src/gpu/ocl/ocl_post_ops.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@
1717
#ifndef GPU_OCL_OCL_POST_OPS_H
1818
#define GPU_OCL_OCL_POST_OPS_H
1919

20-
#ifndef SUB_GROUP_SIZE
21-
#define SUB_GROUP_SIZE get_sub_group_size()
22-
#endif
23-
2420
#if WITH_POST_OP
2521

2622
#if !WITH_ELTWISE
@@ -183,11 +179,11 @@ float fwd_Xnary(unsigned kind, unsigned algorithm, float x, float y,
183179
} \
184180
}
185181

186-
#define X_NELEMS(x) ({ x / SUB_GROUP_SIZE; })
182+
#define X_NELEMS(x) ({ x / get_sub_group_size(); })
187183

188184
#define CONDITIONAL_FILL( \
189185
idx, blocked_coord, nelem, src_ptr, dst_ptr, data_type) \
190-
if (blocked_coord / SUB_GROUP_SIZE == nelem) \
186+
if (blocked_coord / get_sub_group_size() == nelem) \
191187
FILL_WITH_BLOCK_READ(idx, src_ptr, dst_ptr, nelem, data_type);
192188

193189
#define FILL_BIN_ARG_TRY_BLOCK(idx, dest_ptr, dest_size, x0, x0_s, x1, x1_s, \
@@ -269,7 +265,7 @@ float fwd_Xnary(unsigned kind, unsigned algorithm, float x, float y,
269265
REPLICATE_DATA(bin_arg_ptr, bin_arg_size, x0_s, X_NELEMS(x1_s), \
270266
x2_s, x3_s, x4_s, x5_s); \
271267
} else { \
272-
const unsigned x1_jump = is_burst ? SUB_GROUP_SIZE : 1; \
268+
const unsigned x1_jump = is_burst ? get_sub_group_size() : 1; \
273269
const unsigned x1_size = x1_s / x1_jump; \
274270
FILL_BIN_ARG_SERIAL(idx, bin_arg_ptr, x0, x0_s, (x1 + x1_incr), \
275271
x1_s, x1_jump, x2, x2_s, x3, x3_s, x4, x4_s, x5, x5_s); \

0 commit comments

Comments
 (0)