Skip to content

Commit 5cc640c

Browse files
author
Vladimir Paramuzov
committed
Fixes
1 parent d8a38b4 commit 5cc640c

File tree

9 files changed

+51
-31
lines changed

9 files changed

+51
-31
lines changed

src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ enum class LogLevel : int8_t {
4848
#else
4949
#define SEPARATE '/'
5050
#endif
51-
#define __FILENAME__ (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__)
51+
#define GPU_FILENAME (strrchr(__FILE__, SEPARATE) ? strrchr(__FILE__, SEPARATE) + 1 : __FILE__)
5252
#define GPU_DEBUG_IF(cond) if (cond)
5353
#define GPU_DEBUG_CODE(...) __VA_ARGS__
5454
#define GPU_DEBUG_DEFINE_MEM_LOGGER(stage) \
@@ -62,9 +62,9 @@ enum class LogLevel : int8_t {
6262
#define GPU_DEBUG_LOG_RAW_INT(min_verbose_level) if (cldnn::debug_configuration::get_instance()->verbose >= min_verbose_level) \
6363
((cldnn::debug_configuration::get_instance()->verbose_color == 0) ? GPU_DEBUG_LOG_PREFIX : GPU_DEBUG_LOG_COLOR_PREFIX)
6464
#define GPU_DEBUG_LOG_RAW(min_verbose_level) GPU_DEBUG_LOG_RAW_INT(static_cast<std::underlying_type<ov::intel_gpu::LogLevel>::type>(min_verbose_level))
65-
#define GPU_DEBUG_LOG_PREFIX std::cout << cldnn::debug_configuration::prefix << __FILENAME__ << ":" <<__LINE__ << ":" << __func__ << ": "
65+
#define GPU_DEBUG_LOG_PREFIX std::cout << cldnn::debug_configuration::prefix << GPU_FILENAME << ":" <<__LINE__ << ":" << __func__ << ": "
6666
#define GPU_DEBUG_LOG_COLOR_PREFIX std::cout << DARK_GRAY << cldnn::debug_configuration::prefix << \
67-
BLUE << __FILENAME__ << ":" << PURPLE << __LINE__ << ":" << CYAN << __func__ << ": " << RESET
67+
BLUE << GPU_FILENAME << ":" << PURPLE << __LINE__ << ":" << CYAN << __func__ << ": " << RESET
6868
#define DARK_GRAY "\033[1;30m"
6969
#define BLUE "\033[1;34m"
7070
#define PURPLE "\033[1;35m"

src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
#include "kernel_selector_params.h"
1010
#include "to_string_utils.h"
1111
#include "program_node.h"
12+
#include "intel_gpu/graph/serialization/layout_serializer.hpp"
13+
#include "intel_gpu/graph/serialization/polymorphic_serializer.hpp"
14+
#include "intel_gpu/graph/serialization/string_serializer.hpp"
15+
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
1216

1317
#include "intel_gpu/primitives/concatenation.hpp"
1418
#include "intel_gpu/primitives/convolution.hpp"

src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
#define EXE_MODE_AGE_BASED "-cl-no-subgroup-ifp"
2020
#define EXE_MODE_NO_PRERA_SCH "-cl-intel-no-prera-scheduling"
2121

22+
namespace micro {
23+
struct MicroKernelPackage;
24+
} // namspace
25+
2226
namespace kernel_selector {
2327

2428
#ifndef UNUSED
@@ -64,6 +68,7 @@ struct KernelCode {
6468
struct clKernelData {
6569
KernelCode code;
6670
KernelParams params;
71+
std::vector<std::shared_ptr<micro::MicroKernelPackage>> micro_kernels;
6772
bool skip_execution = false;
6873
};
6974

src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_micro.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ bool SDPAKernelMicro::Validate(const Params& p) const {
334334
return true;
335335
}
336336

337-
JitConstants SDPAKernelMicro::GetJitConstants(const sdpa_params& params, micro::Package& gemm_kq, micro::Package& gemm_vs) const {
337+
JitConstants SDPAKernelMicro::GetJitConstants(const sdpa_params& params, const micro::Package& gemm_kq, const micro::Package& gemm_vs) const {
338338
auto jit = MakeBaseParamsJitConstants(params);
339339
const auto& prim_params = dynamic_cast<const sdpa_params&>(params);
340340

@@ -452,7 +452,7 @@ JitConstants SDPAKernelMicro::GetJitConstants(const sdpa_params& params, micro::
452452
return jit;
453453
}
454454

455-
CommonDispatchData SDPAKernelMicro::SetDefault(const sdpa_params& params, micro::Package& gemm_kq, micro::Package& gemm_vs) const {
455+
CommonDispatchData SDPAKernelMicro::SetDefault(const sdpa_params& params, const micro::Package& gemm_kq, const micro::Package& gemm_vs) const {
456456
CommonDispatchData dispatch_data;
457457

458458
auto wg_tile_q = gemm_kq.getSetting("wg_tile_n");
@@ -468,12 +468,14 @@ CommonDispatchData SDPAKernelMicro::SetDefault(const sdpa_params& params, micro:
468468
return dispatch_data;
469469
}
470470

471-
clKernelData SDPAKernelMicro::get_kernel_data(const sdpa_params& params, micro::Package& gemm_kq, micro::Package& gemm_vs, bool is_prefill) const {
471+
clKernelData SDPAKernelMicro::get_kernel_data(const sdpa_params& params, bool is_prefill) const {
472472
auto name = kernelName + (is_prefill ? "_prefill" : "_generate");
473-
init_microkernels(params, gemm_kq, gemm_vs, is_prefill);
474-
auto dispatch_data = SetDefault(params, gemm_kq, gemm_vs);
473+
474+
std::vector<micro::Package> gemms(2); // KQ and VS
475+
init_microkernels(params, gemms[kq_id], gemms[vs_id], is_prefill);
476+
auto dispatch_data = SetDefault(params, gemms[kq_id], gemms[vs_id]);
475477
auto entry_point = GetEntryPoint(name, params.layerID, params);
476-
auto jit = CreateJit(name, GetJitConstants(params, gemm_kq, gemm_vs), entry_point);
478+
auto jit = CreateJit(name, GetJitConstants(params, gemms[kq_id], gemms[vs_id]), entry_point);
477479
clKernelData kernel;
478480

479481
FillCLKernelData(kernel, dispatch_data, params.engineInfo, kernelName, jit, entry_point,
@@ -519,13 +521,13 @@ clKernelData SDPAKernelMicro::get_kernel_data(const sdpa_params& params, micro::
519521
shim_options.useTileOps = true;
520522
shim_options.decorator = "kq";
521523

522-
kernel.code.kernelString->jit += generateShim(gemm_kq, micro::HostLanguage::OpenCL_C, shim_options);
524+
kernel.code.kernelString->jit += generateShim(gemms[kq_id], micro::HostLanguage::OpenCL_C, shim_options);
523525

524526
shim_options.microkernelID++;
525527
shim_options.decorator = "vs";
526-
kernel.code.kernelString->jit += generateShim(gemm_vs, micro::HostLanguage::OpenCL_C, shim_options);
528+
kernel.code.kernelString->jit += generateShim(gemms[vs_id], micro::HostLanguage::OpenCL_C, shim_options);
527529

528-
if (gemm_kq.grfMin > 128 || gemm_vs.grfMin > 128)
530+
if (gemms[kq_id].grfMin > 128 || gemms[vs_id].grfMin > 128)
529531
kernel.code.kernelString->options += " -cl-intel-256-GRF-per-thread";
530532

531533
std::string extra_options = " -Dcl_intel_dot_accumulate";
@@ -537,6 +539,10 @@ clKernelData SDPAKernelMicro::get_kernel_data(const sdpa_params& params, micro::
537539
kernel.code.kernelString->batch_compilation = false;
538540
kernel.code.kernelString->has_microkernels = true;
539541

542+
for (auto& p : gemms) {
543+
kernel.micro_kernels.push_back(std::make_shared<micro::MicroKernelPackage>(p));
544+
}
545+
540546
return kernel;
541547
}
542548

@@ -549,11 +555,8 @@ KernelsData SDPAKernelMicro::GetKernelsData(const Params& params) const {
549555
return {};
550556
}
551557

552-
gemms_kq.resize(2);
553-
gemms_vs.resize(2);
554-
555558
for (size_t i = 0; i < num_kernels; i++) {
556-
kd.kernels[i] = get_kernel_data(prim_params, gemms_kq[i], gemms_vs[i], i == prefill_id);
559+
kd.kernels[i] = get_kernel_data(prim_params, i == prefill_id);
557560
}
558561

559562
GetUpdateDispatchDataFunc(kd);
@@ -594,7 +597,8 @@ void SDPAKernelMicro::GetUpdateDispatchDataFunc(KernelData& kd) const {
594597
kernel_data.kernels[prefill_id].skip_execution = true;
595598
kernel_data.kernels[generate_id].skip_execution = true;
596599

597-
auto dispatchData = SetDefault(prim_params, gemms_kq[target_kernel], gemms_vs[target_kernel]);
600+
const auto& gemms = kernel_data.kernels[target_kernel].micro_kernels;
601+
auto dispatchData = SetDefault(prim_params, gemms[kq_id]->p, gemms[vs_id]->p);
598602
kernel_data.kernels[target_kernel].params.workGroups.global = dispatchData.gws;
599603
kernel_data.kernels[target_kernel].params.workGroups.local = dispatchData.lws;
600604
kernel_data.kernels[target_kernel].skip_execution = KernelData::SkipKernelExecution(prim_params);

src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_micro.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,22 @@ class SDPAKernelMicro : public SDPAKernelBase {
2121
protected:
2222
bool Validate(const Params& p) const override;
2323
void GetUpdateDispatchDataFunc(KernelData& kd) const override;
24-
CommonDispatchData SetDefault(const sdpa_params& params, micro::Package& gemm_kq, micro::Package& gemm_vs) const;
25-
JitConstants GetJitConstants(const sdpa_params& params, micro::Package& gemm_kq, micro::Package& gemm_vs) const;
24+
CommonDispatchData SetDefault(const sdpa_params& params, const micro::Package& gemm_kq, const micro::Package& gemm_vs) const;
25+
JitConstants GetJitConstants(const sdpa_params& params, const micro::Package& gemm_kq, const micro::Package& gemm_vs) const;
2626
std::vector<FusedOpType> GetSupportedFusedOps() const override {
2727
return {};
2828
}
2929

3030
void init_microkernels(const sdpa_params& params, micro::Package& gemm_kq, micro::Package& gemm_vs, bool is_prefill) const;
31-
clKernelData get_kernel_data(const sdpa_params& params, micro::Package& gemm_kq, micro::Package& gemm_vs, bool is_prefill) const;
31+
clKernelData get_kernel_data(const sdpa_params& params, bool is_prefill) const;
3232

3333
private:
34-
mutable std::vector<micro::Package> gemms_kq;
35-
mutable std::vector<micro::Package> gemms_vs;
36-
3734
static constexpr size_t prefill_id = 0;
3835
static constexpr size_t generate_id = 1;
36+
37+
static constexpr size_t kq_id = 0;
38+
static constexpr size_t vs_id = 1;
39+
3940
static std::mutex m;
4041
};
4142
} // namespace kernel_selector

src/plugins/intel_gpu/src/kernel_selector/micro_utils.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,11 @@
1010
# undef UNUSED
1111
#endif
1212

13-
#ifdef __FILENAME__
14-
# undef __FILENAME__
15-
#endif
16-
1713
#ifndef NOMINMAX
1814
# define NOMINMAX
1915
#endif
2016

2117
#include "gpu/intel/microkernels/package.hpp"
22-
#include "gpu/intel/jit/jit_generator.hpp"
2318
#include "gpu/intel/jit/gemm/microkernel_provider.hpp"
2419
#include "gpu/intel/jit/gemm/gen_gemm_kernel_generator.hpp"
2520
#include "gpu/intel/microkernels/shim.hpp"
@@ -38,6 +33,14 @@ namespace micro {
3833
using ShimOptions = dnnl::impl::gpu::intel::micro::ShimOptions;
3934
using HostLanguage = dnnl::impl::gpu::intel::micro::HostLanguage;
4035

36+
// Wrapper for Package which is used in clKernelData with forward declaration
37+
// to avoid including this header in many places in plugin
38+
// which may cause symbols conflicts with oneDNN
39+
struct MicroKernelPackage {
40+
explicit MicroKernelPackage(Package _p) : p(_p) {}
41+
Package p;
42+
};
43+
4144
inline Package select_gemm_microkernel(GEMMProtocol protocol, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
4245
const std::vector<StrategyRequirement> &reqs = std::vector<StrategyRequirement>(),
4346
void (*strategyAdjuster)(GEMMStrategy &strategy) = nullptr) {
@@ -50,7 +53,6 @@ namespace micro {
5053

5154
} // namespace micro
5255

53-
#undef __FILENAME__
5456
#undef UNUSED
5557

5658
#endif // ENABLE_ONEDNN_FOR_GPU

src/plugins/intel_gpu/src/runtime/kernels_cache.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
#include "ocl/ocl_kernel.hpp"
88
#include "ocl/ocl_engine.hpp"
99
#include "ocl/ocl_common.hpp"
10+
#include "intel_gpu/graph/serialization/set_serializer.hpp"
11+
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
12+
#include "intel_gpu/graph/serialization/map_serializer.hpp"
13+
#include "intel_gpu/graph/serialization/string_serializer.hpp"
1014
#include "intel_gpu/runtime/debug_configuration.hpp"
1115
#include "intel_gpu/runtime/itt.hpp"
1216
#include "intel_gpu/runtime/file_util.hpp"

src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

5-
// Include jit_generator first to resolve __FILENAME__ macro conflct with debug_configuration.hpp
65
#ifdef ENABLE_ONEDNN_FOR_GPU
76
#ifndef NOMINMAX
87
# define NOMINMAX
98
#endif
109
#include "gpu/intel/jit/jit_generator.hpp"
11-
#undef __FILENAME__
1210
#endif
1311

1412
#include "ocl_device.hpp"

src/plugins/intel_gpu/src/runtime/ocl/ocl_wrapper.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,15 @@
5050
#pragma clang diagnostic ignored "-Wunused-variable"
5151
#pragma clang diagnostic ignored "-Wunused-function"
5252
#pragma clang diagnostic ignored "-Wignored-qualifiers"
53+
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
5354
#elif defined __GNUC__
5455
#pragma GCC diagnostic push
5556
#pragma GCC diagnostic ignored "-Wsign-compare"
5657
#pragma GCC diagnostic ignored "-Wunused-parameter"
5758
#pragma GCC diagnostic ignored "-Wunused-variable"
5859
#pragma GCC diagnostic ignored "-Wunused-function"
5960
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
61+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
6062
#if __GNUC__ >= 8
6163
#pragma GCC diagnostic ignored "-Wcatch-value"
6264
#endif

0 commit comments

Comments
 (0)