Skip to content

Commit 02115dc

Browse files
authored
vulkan: Allow up to 4096 elements for mul_mat_id row_ids (#13326)
This assert fired running Qwen_Qwen3-30B-A3B-Q2_K.gguf: GGML_ASSERT(nei0 * nei1 <= 3072); The tensor is 8 x 512. Increase this array size to accommodate.
1 parent d9c4acc commit 02115dc

File tree

4 files changed

+5
-5
lines changed

4 files changed

+5
-5
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,7 +1632,7 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
16321632
const uint32_t warps = warptile[0] / warptile[10];
16331633

16341634
const uint32_t load_bufs = (warptile[1] + warptile[2]) * (warptile[3] + bank_conflict_offset) * type_size;
1635-
const uint32_t mmid_row_ids = mul_mat_id ? 3072 * sizeof(uint32_t) : 0;
1635+
const uint32_t mmid_row_ids = mul_mat_id ? 4096 * sizeof(uint32_t) : 0;
16361636
const uint32_t coopmat_stage = device->coopmat_support ? warptile[7] * warptile[8] / warps * sizeof(float) : 0;
16371637

16381638
const uint32_t total_size = load_bufs + mmid_row_ids + coopmat_stage + lut_size;
@@ -5260,7 +5260,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
52605260

52615261
const uint64_t nei0 = ids->ne[0];
52625262
const uint64_t nei1 = ids->ne[1];
5263-
GGML_ASSERT(nei0 * nei1 <= 3072);
5263+
GGML_ASSERT(nei0 * nei1 <= 4096);
52645264

52655265
const uint32_t nbi1 = ids->nb[1];
52665266
const uint32_t nbi2 = ids->nb[2];

ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ shared FLOAT_TYPE buf_a[BM * SHMEM_STRIDE];
103103
shared FLOAT_TYPE buf_b[BN * SHMEM_STRIDE];
104104

105105
#ifdef MUL_MAT_ID
106-
shared u16vec2 row_ids[3072];
106+
shared u16vec2 row_ids[4096];
107107
#endif // MUL_MAT_ID
108108

109109
#define NUM_WARPS (BLOCK_SIZE / WARP)

ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
9292
#ifdef MUL_MAT_ID
9393
layout (binding = 3) readonly buffer IDS {int data_ids[];};
9494

95-
shared u16vec4 row_ids[3072];
95+
shared u16vec4 row_ids[4096];
9696

9797
layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufB {
9898
B_TYPE b[];

ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ shared FLOAT_TYPE_VEC2 buf_b_ds[BN];
101101
#define LOAD_VEC_B 4
102102

103103
#ifdef MUL_MAT_ID
104-
shared u16vec2 row_ids[3072];
104+
shared u16vec2 row_ids[4096];
105105
#endif // MUL_MAT_ID
106106

107107
#define NUM_WARPS (BLOCK_SIZE / WARP)

0 commit comments

Comments
 (0)