Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ void compile_graph::run(program& p) {
GPU_DEBUG_IF(debug_config->disable_onednn_permute_fusion == 1)
disable_permute_fuse_onednn_gemm = true;


for (size_t idx = 0; idx < proc_order.size(); idx++) {
auto& node = *(std::next(proc_order.begin(), idx));
const bool use_shape_agnostic_impl = !p.get_config().get_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape);
Expand All @@ -70,6 +69,14 @@ void compile_graph::run(program& p) {
change_initial_impl = false;
}
}
if (node->is_type<convolution>()) {
auto w_layout = node->as<convolution>().weights().get_output_layout();
// Convolution_fsv16_1x1 is only available shape agnostic kernel for onednn convolution which uses the block format.(fsv16)
// Onednn convolution doesn't support input padding but most of cldnn optimized convolution require input padding except fsv16_1x1.
if (w_layout.spatial(0) != 1 || w_layout.spatial(1) != 1) {
change_initial_impl = false;
}
}
}

if (change_initial_impl)
Expand Down Expand Up @@ -100,8 +107,10 @@ void compile_graph::run(program& p) {

bool is_planar = format::is_default_format(node->get_output_layout().format);

if (node->is_dynamic() && !is_planar)
if ((node->is_dynamic() && !is_planar &&
(!node->is_type<convolution>() || (node->is_type<convolution>() && node->get_output_layout().format != cldnn::format::b_fs_yx_fsv16)))) {
can_select_impl = false;
}

if (node->is_type<condition>() || node->is_type<loop>() || node->is_type<proposal>())
can_select_impl = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,26 +196,30 @@ void prepare_primitive_fusing::fuse_bias(program &p) {


if (node->get_output_layout().is_dynamic()) {
auto broadcast_type = eltw_node.get_primitive()->broadcast_spec.m_type;
if (!eltw_node.get_dependency(non_const_dep_idx).is_type<fully_connected>())
continue;
if (broadcast_type != ov::op::AutoBroadcastType::NUMPY && broadcast_type != ov::op::AutoBroadcastType::NONE)
continue;
// Numpy broadcast rule requires the dimension size which is not one to be same as the corresponding dimension of the other operand.
// So we can ensure that the feature size is same for this broadcasting rule, thereby being considered as bias.
auto const_shape = eltw_node.get_dependency(const_dep_idx).get_output_layout().get_shape();
int32_t count_elements_not_one = 0;
int32_t idx_element_not_one = -1;
for (size_t i = 0; i < const_shape.size(); ++i) {
if (const_shape[i] != 1) {
count_elements_not_one++;
idx_element_not_one = static_cast<int32_t>(i);
if (eltw_node.get_dependency(non_const_dep_idx).is_type<fully_connected>()) {
auto broadcast_type = eltw_node.get_primitive()->broadcast_spec.m_type;
if (broadcast_type != ov::op::AutoBroadcastType::NUMPY && broadcast_type != ov::op::AutoBroadcastType::NONE)
continue;

// Numpy broadcast rule requires the dimension size which is not one to be same as the corresponding dimension of the other operand.
// So we can ensure that the feature size is same for this broadcasting rule, thereby being considered as bias.
auto const_shape = eltw_node.get_dependency(const_dep_idx).get_output_layout().get_shape();
int32_t count_elements_not_one = 0;
int32_t idx_element_not_one = -1;
for (size_t i = 0; i < const_shape.size(); ++i) {
if (const_shape[i] != 1) {
count_elements_not_one++;
idx_element_not_one = static_cast<int32_t>(i);
}
if (count_elements_not_one > 1)
break;
}
if (count_elements_not_one > 1)
break;
}
if (count_elements_not_one != 1 ||
(idx_element_not_one != (static_cast<int32_t>(const_shape.size()) - 1))) {

if (count_elements_not_one != 1 ||
(idx_element_not_one != (static_cast<int32_t>(const_shape.size()) - 1))) {
continue;
}
} else if (!eltw_node.get_dependency(non_const_dep_idx).is_type<convolution>()) {
continue;
}
} else {
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/ocl/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,8 @@ attach_convolution_impl::attach_convolution_impl() {
};
auto dyn_formats = {
format::bfyx,
format::bfzyx
format::bfzyx,
format::b_fs_yx_fsv16
};

implementation_map<convolution>::add(impl_types::ocl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE)
__attribute__((reqd_work_group_size(1, SUB_GROUP_SIZE * SLM_DIV_FACTOR, 1)))
KERNEL(convolution_b_fs_yx_fsv16_1x1)(
OPTIONAL_SHAPE_INFO_ARG
__global INPUT0_TYPE* input,
__global OUTPUT_TYPE* output,
__global FILTER_TYPE* weights
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/kernel_selector/jitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,8 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
if (_tensor.is_dynamic()) {
if (_tensor.GetLayout() == DataLayout::bf || _tensor.GetLayout() == DataLayout::bfyx ||
_tensor.GetLayout() == DataLayout::bfzyx || _tensor.GetLayout() == DataLayout::bfwzyx ||
_tensor.GetLayout() == DataLayout::bfuwzyx || _tensor.GetLayout() == DataLayout::bfvuwzyx) {
_tensor.GetLayout() == DataLayout::bfuwzyx || _tensor.GetLayout() == DataLayout::bfvuwzyx ||
_tensor.GetLayout() == DataLayout::b_fs_yx_fsv16) {
definitions.push_back({_name + "_X_PITCH", "1"});
definitions.push_back({_name + "_Y_PITCH", dims_padded.x()});
definitions.push_back({_name + "_Z_PITCH", toVectorMulString({dims_padded.x(), dims_padded.y()})});
Expand Down
Loading