Skip to content

Commit 44e199d

Browse files
committed
sycl : Fixed mmvq being called when reorder is disabled
1 parent ae199a7 commit 44e199d

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2993,12 +2993,15 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
29932993
use_mul_mat_q = use_mul_mat_q && (src1->ne[1] <= MMQ_MAX_BATCH_SIZE);
29942994
#endif // SYCL_USE_XMX
29952995

2996+
const bool reorder = static_cast<ggml_tensor_extra_gpu *>(dst->src[0]->extra) &&
2997+
static_cast<ggml_tensor_extra_gpu *>(dst->src[0]->extra)->optimized_feature.reorder;
2998+
29962999
// mmvq path is faster in the CUDA backend.
29973000
if (ctx.stream()->get_backend() == sycl::backend::ext_oneapi_cuda
29983001
// Dispatch becomes obscure with the reorder, MMVQ when the reorder optimization
29993002
// is enabled takes precedence over DMMV, the current if-else implementation
30003003
// requires disabling DMMV if both conditions are met
3001-
|| (ctx.opt_feature.reorder && ggml_sycl_supports_reorder_mmvq(src0->type))) {
3004+
|| (reorder && ggml_sycl_supports_reorder_mmvq(src0->type))) {
30023005
use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
30033006
}
30043007

0 commit comments

Comments
 (0)