Skip to content

Commit dead8f4

Browse files
committed
Fix misaligned memory access in Q4_1 kernel
1 parent 72af259 commit dead8f4

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

ggml-cuda.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1433,7 +1433,9 @@ static __device__ __forceinline__ float vec_dot_q5_1_q8_1(
14331433
const block_q5_1 * bq5_1 = (const block_q5_1 *) vbq;
14341434

14351435
// TODO: fix misaligned access
1436-
const int qs = *((int *) &bq5_1->qs[sizeof(int) * (iqs + 0)]);
1436+
int qs;
1437+
memcpy(&qs, &bq5_1->qs[sizeof(int) * (iqs + 0)], sizeof(qs));
1438+
//const int qs = *((int *) &bq5_1->qs[sizeof(int) * (iqs + 0)]);
14371439
const int qh0 = bq5_1->qh[iqs/2 + 0] >> 4*(iqs%2);
14381440
const int qh1 = bq5_1->qh[iqs/2 + 2] >> 4*(iqs%2);
14391441
const int ui0 = *((int *) &bq8_1->qs[sizeof(int) * (iqs + 0)]);

0 commit comments

Comments
 (0)