@@ -353,9 +353,8 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
353
353
size_t padded_size = ggml_backend_buft_get_alloc_size (buffer->buft , tensor);
354
354
355
355
if (padded_size > original_size && tensor->view_src == nullptr ) {
356
- SYCL_CHECK (CHECK_TRY_ERROR (ctx->stream ->memset (
357
- (char *)tensor->data + original_size, 0 ,
358
- padded_size - original_size)));
356
+ SYCL_CHECK (CHECK_TRY_ERROR (
357
+ ctx->stream ->memset ((char *) tensor->data + original_size, 0 , padded_size - original_size)));
359
358
}
360
359
}
361
360
return GGML_STATUS_SUCCESS;
@@ -374,16 +373,17 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
374
373
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context ;
375
374
ggml_sycl_set_device (ctx->device );
376
375
auto stream = &(dpct::dev_mgr::instance ().get_device (ctx->device ).default_queue ());
377
- SYCL_CHECK (
378
- CHECK_TRY_ERROR ( dpct::dev_mgr::instance (). get_device (ctx-> device ). queues_wait_and_throw ()));
376
+ SYCL_CHECK (CHECK_TRY_ERROR ( dpct::dev_mgr::instance (). get_device (ctx-> device ). queues_wait_and_throw ()));
377
+ # ifndef _WIN32
379
378
// Note: Use host buffer to save the data from mmap(), then copy to device. It's workaround for mmap() issue on PVC GPU.
380
379
// This function will be called during load model from disk. Use memory buffer replace dynamic won't save more time and brings potential memory leak risk here.
381
- char * host_buf = (char *) malloc (size);
380
+ char * host_buf = (char *) malloc (size);
382
381
memcpy (host_buf, data, size);
383
- SYCL_CHECK (
384
- CHECK_TRY_ERROR ((*stream).memcpy ((char *)tensor->data + offset, host_buf, size)
385
- .wait ()));
382
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy ((char *) tensor->data + offset, host_buf, size).wait ()));
386
383
free (host_buf);
384
+ #else
385
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy ((char *) tensor->data + offset, data, size).wait ()));
386
+ #endif
387
387
}
388
388
catch (sycl::exception const &exc) {
389
389
std::cerr << exc.what () << " Exception caught at file:" << __FILE__
@@ -487,9 +487,7 @@ static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
487
487
SYCL_CHECK (
488
488
CHECK_TRY_ERROR (dpct::get_current_device ().queues_wait_and_throw ()));
489
489
490
- SYCL_CHECK (CHECK_TRY_ERROR ((*stream)
491
- .memset (ctx->dev_ptr , value, buffer->size )
492
- ));
490
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memset (ctx->dev_ptr , value, buffer->size )));
493
491
}
494
492
catch (sycl::exception const &exc) {
495
493
std::cerr << exc.what () << " Exception caught at file:" << __FILE__
@@ -829,10 +827,7 @@ ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
829
827
the error codes. The original code was commented out and a warning
830
828
string was inserted. You need to rewrite this code.
831
829
*/
832
- SYCL_CHECK (CHECK_TRY_ERROR (
833
- (*stream)
834
- .memset (buf + original_size, 0 , size - original_size)
835
- ));
830
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memset (buf + original_size, 0 , size - original_size)));
836
831
}
837
832
838
833
extra->data_device [i] = buf;
@@ -897,10 +892,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
897
892
*/
898
893
ggml_sycl_set_device (i);
899
894
const queue_ptr stream = ctx->streams [i];
900
- SYCL_CHECK (CHECK_TRY_ERROR (
901
- (*stream)
902
- .memcpy (extra->data_device [i], buf_host, original_size)
903
- ));
895
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy (extra->data_device [i], buf_host, original_size)));
904
896
}
905
897
}
906
898
catch (sycl::exception const &exc) {
@@ -950,10 +942,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
950
942
*/
951
943
ggml_sycl_set_device (i);
952
944
const queue_ptr stream = ctx->streams [i];
953
- SYCL_CHECK (CHECK_TRY_ERROR (
954
- (*stream)
955
- .memcpy (buf_host, extra->data_device [i], original_size)
956
- ));
945
+ SYCL_CHECK (CHECK_TRY_ERROR ((*stream).memcpy (buf_host, extra->data_device [i], original_size)));
957
946
}
958
947
}
959
948
catch (sycl::exception const &exc) {
@@ -2483,10 +2472,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
2483
2472
if (i != ctx.device ) {
2484
2473
if (convert_src1_to_q8_1) {
2485
2474
char * src1_ddq_i_source = dev[ctx.device ].src1_ddq + src1_ddq_i_offset;
2486
- SYCL_CHECK (CHECK_TRY_ERROR (stream->memcpy (
2487
- src1_ddq_i, src1_ddq_i_source,
2488
- src1_ncols * src1_padded_col_size * q8_1_ts /
2489
- q8_1_bs)));
2475
+ SYCL_CHECK (CHECK_TRY_ERROR (stream->memcpy (
2476
+ src1_ddq_i, src1_ddq_i_source, src1_ncols * src1_padded_col_size * q8_1_ts / q8_1_bs)));
2490
2477
} else {
2491
2478
2492
2479
float * src1_ddf_i_source = (float *) src1_extra->data_device [ctx.device ];
@@ -2551,9 +2538,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
2551
2538
float * dhf_dst_i = (float *) ((char *) dst_off_device + i02*nb2 + i03*nb3);
2552
2539
GGML_ASSERT (dst->nb [1 ] == ne0*sizeof (float ));
2553
2540
dhf_dst_i += src1_col_0*ne0;
2554
- SYCL_CHECK (CHECK_TRY_ERROR (
2555
- stream->memcpy (dhf_dst_i, dst_dd_i,
2556
- src1_ncols * ne0 * sizeof (float ))));
2541
+ SYCL_CHECK (
2542
+ CHECK_TRY_ERROR (stream->memcpy (dhf_dst_i, dst_dd_i, src1_ncols * ne0 * sizeof (float ))));
2557
2543
}
2558
2544
}
2559
2545
@@ -3680,8 +3666,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
3680
3666
3681
3667
GGML_ASSERT (buf->buft == ggml_backend_sycl_buffer_type (sycl_ctx->device ) && " unsupported buffer type" );
3682
3668
const queue_ptr stream = sycl_ctx->stream (sycl_ctx->device , 0 );
3683
- SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (
3684
- data, (const char *)tensor->data + offset, size)));
3669
+ SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (data, (const char *) tensor->data + offset, size)));
3685
3670
}
3686
3671
catch (sycl::exception const &exc) {
3687
3672
std::cerr << exc.what () << " Exception caught at file:" << __FILE__
@@ -3700,8 +3685,7 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
3700
3685
was inserted. You need to rewrite this code.
3701
3686
*/
3702
3687
const queue_ptr stream = sycl_ctx->stream (sycl_ctx->device , 0 );
3703
- SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (
3704
- dst->data , src->data , ggml_nbytes (dst))));
3688
+ SYCL_CHECK (CHECK_TRY_ERROR ((stream)->memcpy (dst->data , src->data , ggml_nbytes (dst))));
3705
3689
return true ;
3706
3690
}
3707
3691
0 commit comments