Skip to content

Commit 859bcc7

Browse files
committed
Remove mmap workaround on windows
After some testing I found that mmap is supported on windows and for many GPUs on Linux. Therefore I remove the workaround for windows since it is not necessary.
1 parent 2f91bc0 commit 859bcc7

File tree

1 file changed

+19
-35
lines changed

1 file changed

+19
-35
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 19 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -353,9 +353,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
353353
size_t padded_size = ggml_backend_buft_get_alloc_size(buffer->buft, tensor);
354354

355355
if (padded_size > original_size && tensor->view_src == nullptr) {
356-
SYCL_CHECK(CHECK_TRY_ERROR(ctx->stream->memset(
357-
(char *)tensor->data + original_size, 0,
358-
padded_size - original_size)));
356+
SYCL_CHECK(CHECK_TRY_ERROR(
357+
ctx->stream->memset((char *) tensor->data + original_size, 0, padded_size - original_size)));
359358
}
360359
}
361360
return GGML_STATUS_SUCCESS;
@@ -374,16 +373,17 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
374373
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
375374
ggml_sycl_set_device(ctx->device);
376375
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
377-
SYCL_CHECK(
378-
CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
376+
SYCL_CHECK(CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
377+
#ifndef _WIN32
379378
// Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU.
380379
// This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here.
381-
char* host_buf = (char*)malloc(size);
380+
char * host_buf = (char *) malloc(size);
382381
memcpy(host_buf, data, size);
383-
SYCL_CHECK(
384-
CHECK_TRY_ERROR((*stream).memcpy((char *)tensor->data + offset, host_buf, size)
385-
.wait()));
382+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, host_buf, size).wait()));
386383
free(host_buf);
384+
#else
385+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy((char *) tensor->data + offset, data, size).wait()));
386+
#endif
387387
}
388388
catch (sycl::exception const &exc) {
389389
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -487,9 +487,7 @@ static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
487487
SYCL_CHECK(
488488
CHECK_TRY_ERROR(dpct::get_current_device().queues_wait_and_throw()));
489489

490-
SYCL_CHECK(CHECK_TRY_ERROR((*stream)
491-
.memset(ctx->dev_ptr, value, buffer->size)
492-
));
490+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memset(ctx->dev_ptr, value, buffer->size)));
493491
}
494492
catch (sycl::exception const &exc) {
495493
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -829,10 +827,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
829827
the error codes. The original code was commented out and a warning
830828
string was inserted. You need to rewrite this code.
831829
*/
832-
SYCL_CHECK(CHECK_TRY_ERROR(
833-
(*stream)
834-
.memset(buf + original_size, 0, size - original_size)
835-
));
830+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memset(buf + original_size, 0, size - original_size)));
836831
}
837832

838833
extra->data_device[i] = buf;
@@ -897,10 +892,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
897892
*/
898893
ggml_sycl_set_device(i);
899894
const queue_ptr stream = ctx->streams[i];
900-
SYCL_CHECK(CHECK_TRY_ERROR(
901-
(*stream)
902-
.memcpy(extra->data_device[i], buf_host, original_size)
903-
));
895+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy(extra->data_device[i], buf_host, original_size)));
904896
}
905897
}
906898
catch (sycl::exception const &exc) {
@@ -950,10 +942,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
950942
*/
951943
ggml_sycl_set_device(i);
952944
const queue_ptr stream = ctx->streams[i];
953-
SYCL_CHECK(CHECK_TRY_ERROR(
954-
(*stream)
955-
.memcpy(buf_host, extra->data_device[i], original_size)
956-
));
945+
SYCL_CHECK(CHECK_TRY_ERROR((*stream).memcpy(buf_host, extra->data_device[i], original_size)));
957946
}
958947
}
959948
catch (sycl::exception const &exc) {
@@ -2483,10 +2472,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
24832472
if (i != ctx.device) {
24842473
if (convert_src1_to_q8_1) {
24852474
char * src1_ddq_i_source = dev[ctx.device].src1_ddq + src1_ddq_i_offset;
2486-
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
2487-
src1_ddq_i, src1_ddq_i_source,
2488-
src1_ncols * src1_padded_col_size * q8_1_ts /
2489-
q8_1_bs)));
2475+
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
2476+
src1_ddq_i, src1_ddq_i_source, src1_ncols * src1_padded_col_size * q8_1_ts / q8_1_bs)));
24902477
} else {
24912478

24922479
float * src1_ddf_i_source = (float *) src1_extra->data_device[ctx.device];
@@ -2551,9 +2538,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
25512538
float * dhf_dst_i = (float *) ((char *) dst_off_device + i02*nb2 + i03*nb3);
25522539
GGML_ASSERT(dst->nb[1] == ne0*sizeof(float));
25532540
dhf_dst_i += src1_col_0*ne0;
2554-
SYCL_CHECK(CHECK_TRY_ERROR(
2555-
stream->memcpy(dhf_dst_i, dst_dd_i,
2556-
src1_ncols * ne0 * sizeof(float))));
2541+
SYCL_CHECK(
2542+
CHECK_TRY_ERROR(stream->memcpy(dhf_dst_i, dst_dd_i, src1_ncols * ne0 * sizeof(float))));
25572543
}
25582544
}
25592545

@@ -3680,8 +3666,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
36803666

36813667
GGML_ASSERT(buf->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
36823668
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
3683-
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(
3684-
data, (const char *)tensor->data + offset, size)));
3669+
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(data, (const char *) tensor->data + offset, size)));
36853670
}
36863671
catch (sycl::exception const &exc) {
36873672
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
@@ -3700,8 +3685,7 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
37003685
was inserted. You need to rewrite this code.
37013686
*/
37023687
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
3703-
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(
3704-
dst->data, src->data, ggml_nbytes(dst))));
3688+
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(dst->data, src->data, ggml_nbytes(dst))));
37053689
return true;
37063690
}
37073691

0 commit comments

Comments
 (0)