Skip to content

Commit f864fa4

Browse files
committed
Revert "Revert "cuda : fix tensor size calculation for non-split buffer (ggml-org#5145)""
This reverts commit 3d83ce9.
1 parent 3d83ce9 commit f864fa4

File tree

2 files changed

+8
-15
lines changed

2 files changed

+8
-15
lines changed

ggml-backend.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
3838
GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
3939
// get_alloc_size is optional, defaults to ggml_nbytes
4040
if (buft->iface.get_alloc_size) {
41-
return buft->iface.get_alloc_size(buft, tensor);
41+
size_t size = buft->iface.get_alloc_size(buft, tensor);
42+
assert(size >= ggml_nbytes(tensor));
43+
return size;
4244
}
4345
return ggml_nbytes(tensor);
4446
}

ggml-cuda.cu

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9991,8 +9991,8 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
99919991
// TODO: mmq/mmv support
99929992
#endif
99939993

9994-
const int64_t nb11 = src1->nb[1];
9995-
const int64_t nb1 = dst->nb[1];
9994+
const size_t nb11 = src1->nb[1];
9995+
const size_t nb1 = dst->nb[1];
99969996

99979997
const struct ggml_tensor * ids = src0;
99989998
const int32_t id = ((int32_t *) dst->op_params)[0];
@@ -10517,15 +10517,11 @@ GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t
1051710517

1051810518
if (ggml_is_quantized(tensor->type)) {
1051910519
// initialize padding to 0 to avoid possible NaN values
10520-
int64_t row_low = 0;
10521-
int64_t row_high = ggml_nrows(tensor);
10522-
int64_t nrows_split = row_high - row_low;
10523-
10524-
size_t original_size = ggml_nbytes_split(tensor, nrows_split);
10520+
size_t original_size = ggml_nbytes(tensor);
1052510521
size_t padded_size = ggml_backend_buft_get_alloc_size(buffer->buft, tensor);
1052610522

1052710523
if (padded_size > original_size && tensor->view_src == nullptr) {
10528-
CUDA_CHECK(cudaMemsetAsync((char *)tensor->data + original_size, 0, padded_size - original_size, g_cudaStreams[ctx->device][0]));
10524+
CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size));
1052910525
}
1053010526
}
1053110527
}
@@ -10628,12 +10624,7 @@ GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend
1062810624
}
1062910625

1063010626
GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
10631-
int64_t row_low = 0;
10632-
int64_t row_high = ggml_nrows(tensor);
10633-
int64_t nrows_split = row_high - row_low;
10634-
10635-
size_t size = ggml_nbytes_split(tensor, nrows_split);
10636-
10627+
size_t size = ggml_nbytes(tensor);
1063710628
int64_t ne0 = tensor->ne[0];
1063810629

1063910630
if (ggml_is_quantized(tensor->type)) {

0 commit comments

Comments
 (0)