Skip to content

Commit 8ca511c

Browse files
authored
cuda : fix LLAMA_CUDA_F16 (ggml-org#5262)
1 parent d71ac90 commit 8ca511c

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

ggml-cuda.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8657,9 +8657,9 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(
86578657

86588658
if (src1_convert_f16) {
86598659
src1_dfloat = src1_dfloat_a.alloc(ne00);
8660-
ggml_cpy_f32_f16_cuda((const char *) src1_ddf_i, (char *) src1_dfloat, ne00,
8661-
ne00, 1, sizeof(float), 0, 0,
8662-
ne00, 1, sizeof(half), 0, 0, stream);
8660+
const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type);
8661+
GGML_ASSERT(to_fp16_cuda != nullptr);
8662+
to_fp16_cuda(src1_ddf_i, src1_dfloat, ne00, stream);
86638663
}
86648664
#else
86658665
const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion

0 commit comments

Comments
 (0)