|
17 | 17 | #ifndef GPU_OCL_OCL_POST_OPS_H |
18 | 18 | #define GPU_OCL_OCL_POST_OPS_H |
19 | 19 |
|
20 | | -#ifndef SUB_GROUP_SIZE |
21 | | -#define SUB_GROUP_SIZE get_sub_group_size() |
22 | | -#endif |
23 | | - |
24 | 20 | #if WITH_POST_OP |
25 | 21 |
|
26 | 22 | #if !WITH_ELTWISE |
@@ -183,11 +179,11 @@ float fwd_Xnary(unsigned kind, unsigned algorithm, float x, float y, |
183 | 179 | } \ |
184 | 180 | } |
185 | 181 |
|
186 | | -#define X_NELEMS(x) ({ x / SUB_GROUP_SIZE; }) |
| 182 | +#define X_NELEMS(x) ({ x / get_sub_group_size(); }) |
187 | 183 |
|
188 | 184 | #define CONDITIONAL_FILL( \ |
189 | 185 | idx, blocked_coord, nelem, src_ptr, dst_ptr, data_type) \ |
190 | | - if (blocked_coord / SUB_GROUP_SIZE == nelem) \ |
| 186 | + if (blocked_coord / get_sub_group_size() == nelem) \ |
191 | 187 | FILL_WITH_BLOCK_READ(idx, src_ptr, dst_ptr, nelem, data_type); |
192 | 188 |
|
193 | 189 | #define FILL_BIN_ARG_TRY_BLOCK(idx, dest_ptr, dest_size, x0, x0_s, x1, x1_s, \ |
@@ -269,7 +265,7 @@ float fwd_Xnary(unsigned kind, unsigned algorithm, float x, float y, |
269 | 265 | REPLICATE_DATA(bin_arg_ptr, bin_arg_size, x0_s, X_NELEMS(x1_s), \ |
270 | 266 | x2_s, x3_s, x4_s, x5_s); \ |
271 | 267 | } else { \ |
272 | | - const unsigned x1_jump = is_burst ? SUB_GROUP_SIZE : 1; \ |
| 268 | + const unsigned x1_jump = is_burst ? get_sub_group_size() : 1; \ |
273 | 269 | const unsigned x1_size = x1_s / x1_jump; \ |
274 | 270 | FILL_BIN_ARG_SERIAL(idx, bin_arg_ptr, x0, x0_s, (x1 + x1_incr), \ |
275 | 271 | x1_s, x1_jump, x2, x2_s, x3, x3_s, x4, x4_s, x5, x5_s); \ |
|
0 commit comments