From a24d367ce24a8e9d762efe10ff8e52ce1ac48acf Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 25 Mar 2025 04:03:34 -0700 Subject: [PATCH 01/37] Initial impl of dpnp.inter() --- dpnp/CMakeLists.txt | 1 + dpnp/backend/extensions/math/CMakeLists.txt | 87 +++++ dpnp/backend/extensions/math/common.cpp | 75 ++++ dpnp/backend/extensions/math/common.hpp | 93 +++++ .../extensions/math/dispatch_table.hpp | 292 +++++++++++++++ dpnp/backend/extensions/math/interpolate.cpp | 338 ++++++++++++++++++ dpnp/backend/extensions/math/interpolate.hpp | 66 ++++ dpnp/backend/extensions/math/math_py.cpp | 37 ++ dpnp/dpnp_iface_statistics.py | 175 +++++++++ 9 files changed, 1164 insertions(+) create mode 100644 dpnp/backend/extensions/math/CMakeLists.txt create mode 100644 dpnp/backend/extensions/math/common.cpp create mode 100644 dpnp/backend/extensions/math/common.hpp create mode 100644 dpnp/backend/extensions/math/dispatch_table.hpp create mode 100644 dpnp/backend/extensions/math/interpolate.cpp create mode 100644 dpnp/backend/extensions/math/interpolate.hpp create mode 100644 dpnp/backend/extensions/math/math_py.cpp diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index 6be90d849dc4..6c59141bd1d9 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -60,6 +60,7 @@ add_subdirectory(backend/extensions/blas) add_subdirectory(backend/extensions/fft) add_subdirectory(backend/extensions/indexing) add_subdirectory(backend/extensions/lapack) +add_subdirectory(backend/extensions/math) add_subdirectory(backend/extensions/statistics) add_subdirectory(backend/extensions/ufunc) add_subdirectory(backend/extensions/vm) diff --git a/dpnp/backend/extensions/math/CMakeLists.txt b/dpnp/backend/extensions/math/CMakeLists.txt new file mode 100644 index 000000000000..fed91cfd3f9a --- /dev/null +++ b/dpnp/backend/extensions/math/CMakeLists.txt @@ -0,0 +1,87 @@ +# ***************************************************************************** +# Copyright (c) 2016-2025, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _math_impl) +set(_module_src + ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/math_py.cpp +) + +pybind11_add_module(${python_module_name} MODULE ${_module_src}) +add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) + +if(_dpnp_sycl_targets) + # make fat binary + target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpnp_sycl_targets} + ) + target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpnp_sycl_targets} + ) +endif() + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/math" +) diff --git a/dpnp/backend/extensions/math/common.cpp b/dpnp/backend/extensions/math/common.cpp new file mode 100644 index 000000000000..93723e838b21 --- /dev/null +++ b/dpnp/backend/extensions/math/common.cpp @@ -0,0 +1,75 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "common.hpp" +#include "utils/type_dispatch.hpp" +#include + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace math +{ +namespace common +{ +pybind11::dtype dtype_from_typenum(int dst_typenum) +{ + dpctl_td_ns::typenum_t dst_typenum_t = + static_cast(dst_typenum); + switch (dst_typenum_t) { + case dpctl_td_ns::typenum_t::BOOL: + return py::dtype("?"); + case dpctl_td_ns::typenum_t::INT8: + return py::dtype("i1"); + case dpctl_td_ns::typenum_t::UINT8: + return py::dtype("u1"); + case dpctl_td_ns::typenum_t::INT16: + return py::dtype("i2"); + case dpctl_td_ns::typenum_t::UINT16: + return py::dtype("u2"); + case dpctl_td_ns::typenum_t::INT32: + return py::dtype("i4"); + case dpctl_td_ns::typenum_t::UINT32: + return py::dtype("u4"); + case dpctl_td_ns::typenum_t::INT64: + return py::dtype("i8"); + case dpctl_td_ns::typenum_t::UINT64: + return py::dtype("u8"); + case dpctl_td_ns::typenum_t::HALF: + return py::dtype("f2"); + case dpctl_td_ns::typenum_t::FLOAT: + return py::dtype("f4"); + case dpctl_td_ns::typenum_t::DOUBLE: + return py::dtype("f8"); + case dpctl_td_ns::typenum_t::CFLOAT: + return py::dtype("c8"); + case dpctl_td_ns::typenum_t::CDOUBLE: + return py::dtype("c16"); + default: + throw py::value_error("Unrecognized dst_typeid"); + } +} + +} // namespace common +} // namespace math diff --git a/dpnp/backend/extensions/math/common.hpp b/dpnp/backend/extensions/math/common.hpp new file mode 100644 index 000000000000..1d436440c392 --- /dev/null +++ b/dpnp/backend/extensions/math/common.hpp @@ -0,0 +1,93 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include +#include + +// clang-format off +// math_utils.hpp doesn't include sycl header but uses sycl types +// so sycl.hpp must be included before math_utils.hpp +#include +#include "utils/math_utils.hpp" +// clang-format on + +namespace math +{ +namespace common +{ +template +struct Less +{ + bool operator()(const T &lhs, const T &rhs) const + { + return std::less{}(lhs, rhs); + } +}; + +template +struct Less> +{ + bool operator()(const std::complex &lhs, + const std::complex &rhs) const + { + return dpctl::tensor::math_utils::less_complex(lhs, rhs); + } +}; + +template +struct IsNan +{ + static bool isnan(const T &v) + { + if constexpr (std::is_floating_point_v || + std::is_same_v) { + return sycl::isnan(v); + } + + return false; + } +}; + +template +struct IsNan> +{ + static bool isnan(const std::complex &v) + { + T real1 = std::real(v); + T imag1 = std::imag(v); + return sycl::isnan(real1) || sycl::isnan(imag1); + } +}; + + +// This function is a copy from dpctl because it is not available in the public +// headers of dpctl. +pybind11::dtype dtype_from_typenum(int dst_typenum); + +} // namespace common +} // namespace math diff --git a/dpnp/backend/extensions/math/dispatch_table.hpp b/dpnp/backend/extensions/math/dispatch_table.hpp new file mode 100644 index 000000000000..4cfd3d2a09a4 --- /dev/null +++ b/dpnp/backend/extensions/math/dispatch_table.hpp @@ -0,0 +1,292 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include "utils/type_dispatch.hpp" +#include +#include +#include +#include + +#include "common.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace py = pybind11; + +namespace math +{ +namespace common +{ + +template +struct one_of +{ + static_assert(std::is_same_v>, + "one_of: second parameter cannot be empty std::tuple"); + static_assert(false, "one_of: second parameter must be std::tuple"); +}; + +template +struct one_of> +{ + static constexpr bool value = + std::is_same_v || one_of>::value; +}; + +template +struct one_of> +{ + static constexpr bool value = std::is_same_v; +}; + +template +constexpr bool one_of_v = one_of::value; + +template +using Table = FnT[dpctl_td_ns::num_types]; +template +using Table2 = Table[dpctl_td_ns::num_types]; + +using TypeId = int32_t; +using TypesPair = std::pair; + +struct int_pair_hash +{ + inline size_t operator()(const TypesPair &p) const + { + std::hash hasher; + return hasher(size_t(p.first) << (8 * sizeof(TypeId)) | + size_t(p.second)); + } +}; + +using SupportedTypesList = std::vector; +using SupportedTypesList2 = std::vector; +using SupportedTypesSet = std::unordered_set; +using SupportedTypesSet2 = std::unordered_set; + +using DType = py::dtype; +using DTypePair = std::pair; + +using SupportedDTypeList = std::vector; +using SupportedDTypeList2 = std::vector; + +template + typename Func> +struct TableBuilder2 +{ + template + struct impl + { + static constexpr bool is_defined = + one_of_v, SupportedTypes>; + + _FnT get() + { + if constexpr (is_defined) { + return Func::impl; + } + else { + return nullptr; + } + } + }; + + using type = + dpctl_td_ns::DispatchTableBuilder; +}; + +template +class DispatchTable2 +{ +public: + DispatchTable2(std::string first_name, std::string second_name) + : first_name(first_name), second_name(second_name) + { + } + + template + typename Func> + void populate_dispatch_table() + { + using TBulder = typename TableBuilder2::type; + TBulder builder; + + builder.populate_dispatch_table(table); + populate_supported_types(); + } + + FnT get_unsafe(int first_typenum, int second_typenum) const + { + auto array_types = dpctl_td_ns::usm_ndarray_types(); + const int first_type_id = + array_types.typenum_to_lookup_id(first_typenum); + const int second_type_id = + array_types.typenum_to_lookup_id(second_typenum); + + return table[first_type_id][second_type_id]; + } + + FnT get(int first_typenum, int second_typenum) const + { + auto fn = get_unsafe(first_typenum, second_typenum); + + if (fn == nullptr) { + auto array_types = dpctl_td_ns::usm_ndarray_types(); + const int first_type_id = + array_types.typenum_to_lookup_id(first_typenum); + const int second_type_id = + array_types.typenum_to_lookup_id(second_typenum); + + py::dtype first_dtype = dtype_from_typenum(first_type_id); + auto first_type_pos = + std::find(supported_first_type.begin(), + supported_first_type.end(), first_dtype); + if (first_type_pos == supported_first_type.end()) { + py::str types = py::str(py::cast(supported_first_type)); + py::str dtype = py::str(first_dtype); + + py::str err_msg = + py::str("'" + first_name + "' has unsupported type '") + + dtype + + py::str("'." + " Supported types are: ") + + types; + + throw py::value_error(static_cast(err_msg)); + } + + py::dtype second_dtype = dtype_from_typenum(second_type_id); + auto second_type_pos = + std::find(supported_second_type.begin(), + supported_second_type.end(), second_dtype); + if (second_type_pos == supported_second_type.end()) { + py::str types = py::str(py::cast(supported_second_type)); + py::str dtype = py::str(second_dtype); + + py::str err_msg = + py::str("'" + second_name + "' has unsupported type '") + + dtype + + py::str("'." + " Supported types are: ") + + types; + + throw py::value_error(static_cast(err_msg)); + } + + py::str first_dtype_str = py::str(first_dtype); + py::str second_dtype_str = py::str(second_dtype); + py::str types = py::str(py::cast(all_supported_types)); + + py::str err_msg = + py::str("'" + first_name + "' and '" + second_name + + "' has unsupported types combination: ('") + + first_dtype_str + py::str("', '") + second_dtype_str + + py::str("')." + " Supported types combinations are: ") + + types; + + throw py::value_error(static_cast(err_msg)); + } + + return fn; + } + + const SupportedDTypeList &get_supported_first_type() const + { + return supported_first_type; + } + + const SupportedDTypeList &get_supported_second_type() const + { + return supported_second_type; + } + + const SupportedDTypeList2 &get_all_supported_types() const + { + return all_supported_types; + } + +private: + void populate_supported_types() + { + SupportedTypesSet first_supported_types_set; + SupportedTypesSet second_supported_types_set; + SupportedTypesSet2 all_supported_types_set; + + for (int i = 0; i < dpctl_td_ns::num_types; ++i) { + for (int j = 0; j < dpctl_td_ns::num_types; ++j) { + if (table[i][j] != nullptr) { + all_supported_types_set.emplace(i, j); + first_supported_types_set.emplace(i); + second_supported_types_set.emplace(j); + } + } + } + + auto to_supported_dtype_list = [](const auto &supported_set, + auto &supported_list) { + SupportedTypesList lst(supported_set.begin(), supported_set.end()); + std::sort(lst.begin(), lst.end()); + supported_list.resize(supported_set.size()); + std::transform(lst.begin(), lst.end(), supported_list.begin(), + [](TypeId i) { return dtype_from_typenum(i); }); + }; + + to_supported_dtype_list(first_supported_types_set, + supported_first_type); + to_supported_dtype_list(second_supported_types_set, + supported_second_type); + + SupportedTypesList2 lst(all_supported_types_set.begin(), + all_supported_types_set.end()); + std::sort(lst.begin(), lst.end()); + all_supported_types.resize(all_supported_types_set.size()); + std::transform(lst.begin(), lst.end(), all_supported_types.begin(), + [](TypesPair p) { + return DTypePair(dtype_from_typenum(p.first), + dtype_from_typenum(p.second)); + }); + } + + std::string first_name; + std::string second_name; + + SupportedDTypeList supported_first_type; + SupportedDTypeList supported_second_type; + SupportedDTypeList2 all_supported_types; + + Table2 table; +}; + +} // namespace common +} // namespace math diff --git a/dpnp/backend/extensions/math/interpolate.cpp b/dpnp/backend/extensions/math/interpolate.cpp new file mode 100644 index 000000000000..9b69affc5ea0 --- /dev/null +++ b/dpnp/backend/extensions/math/interpolate.cpp @@ -0,0 +1,338 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include +#include + +#include +#include + +// dpctl tensor headers +#include "dpctl4pybind11.hpp" +#include "utils/type_dispatch.hpp" + +#include "interpolate.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +using dpctl::tensor::usm_ndarray; + +using namespace math::interpolate; + +namespace +{ + +template +struct interpolate_kernel +{ + static sycl::event impl(sycl::queue &exec_q, + const void *vx, + const void *vidx, + const void *vxp, + const void *vfp, + void *vout, + const size_t xp_size, + const size_t n, + const std::vector &depends) + { + const T *x = static_cast(vx); + const T *xp = static_cast(vxp); + const T *fp = static_cast(vfp); + const IndexT *idx = static_cast(vidx); + T *out = static_cast(vout); + + // size_t n = x.get_size(); + + // std::vector result(n); + // sycl::event copy_ev = exec_q.memcpy(result.data(), output.get_data(), n * sizeof(float), {ev}); + + // for (size_t i = 0; i< n; i++){ + // std::cout << result[i] << " "; + // } + + return exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + cgh.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { + // T left = fp[0]; + // T right = fp[xp_size - 1]; + + // // IndexT x_idx = idx[i] - 1; + + // // if (sycl::isnan(x[i])) { + // // out[i] = x[i]; + // // } + // // else if (x_idx < 0) { + // // out[i] = left; + // // } + // // //old version check + // // // else if (x[i] == xp[xp_size - 1]) { + // // // out[i] = right; + // // // } + // // // else if (idx[i] >= xp_size - 1) { + // // // out[i] = right; + // // // } + // // // new version check + // // else if (idx[i] == xp_size) { + // // out[i] = right; + // // } + // // else if (idx[i] == xp_size - 1) { + // // out[i] = fp[x_idx]; + // // } + // // else if (x[i] == xp[x_idx]) { + // // out[i] = fp[x_idx]; + // // } + + // IndexT j = idx[i]; + + // if (sycl::isnan(x[i])) { + // out[i] = x[i]; + // } + // else if (j == 0) { + // out[i] = left; + // } + // else if (j == xp_size) { + // out[i] = right; + // } + // else { + // IndexT x_idx = j - 1; + + // if (x[i] == xp[x_idx]) { + // out[i] = fp[x_idx]; + // } + // else { + // T slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); + // T res = slope * (x[i] - xp[x_idx]) + fp[x_idx]; + // // T res = (x[i] - xp[x_idx]) + fp[x_idx]; + + // if (sycl::isnan(res)) { + // res = slope * (x[i] - xp[x_idx + 1]) + fp[x_idx + 1]; + // if (sycl::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { + // res = fp[x_idx]; + // } + // } + // out[i] = res; + // } + // } + + T left = fp[0]; + T right = fp[xp_size - 1]; + IndexT x_idx = idx[i] - 1; + + if (sycl::isnan(x[i])) { + out[i] = x[i]; + } + else if (x_idx < 0) { + out[i] = left; + } + else if (x[i] == xp[xp_size - 1]) { + out[i] = right; + } + else if (x_idx >= xp_size - 1) { + out[i] = right; + } + else if (x[i] == xp[x_idx]) { + out[i] = fp[x_idx]; + } + else { + T slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); + T res = slope * (x[i] - xp[x_idx]) + fp[x_idx]; + + if (sycl::isnan(res)) { + res = slope * (x[i] - xp[x_idx + 1]) + fp[x_idx + 1]; + if (sycl::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { + res = fp[x_idx]; + } + } + + out[i] = res; + } + + // out[i] = x[i]; + }); + }); + } +}; + +// using SupportedTypes = std::tuple, std::tuple>; +// using SupportedTypes = std::tuple, +// std::tuple, +// std::tuple, +// std::tuple, +// std::tuple, +// std::tuple, +// std::tuple>, +// std::tuple>, +// std::tuple>, +// std::tuple>, +// std::tuple, +// std::tuple, +// std::tuple, +// std::tuple, +// std::tuple>, +// std::tuple>, +// std::tuple, int64_t>, +// std::tuple, int64_t>, +// std::tuple, float>, +// std::tuple, double>>; + +using SupportedTypes = std::tuple< + // std::tuple, + // std::tuple, + std::tuple, + std::tuple>; + +} // namespace + +Interpolate::Interpolate() : dispatch_table("x", "idx") +{ + dispatch_table.populate_dispatch_table(); +} + +std::tuple +Interpolate::call(const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + const size_t xp_size, + dpctl::tensor::usm_ndarray &output, + const std::vector &depends) +{ + // validate(x, xp, fp, output); + + if (x.get_size() == 0) { + return {sycl::event(), sycl::event()}; + } + + const int x_typenum = x.get_typenum(); + const int idx_typenum = idx.get_typenum(); + + auto interp_func = dispatch_table.get(x_typenum, idx_typenum); + + auto exec_q = x.get_queue(); + + // size_t n = x.get_size(); + // const size_t m = xp.get_size(); + + // std::vector x_h(n); + // std::vector xp_h(n); + // std::vector fp_h(n); + // std::vector output_h(n); + + // sycl::event copy_1 = exec_q.memcpy(x_h.data(), x.get_data(), n * sizeof(float)); + // sycl::event copy_2 = exec_q.memcpy(xp_h.data(), xp.get_data(), m * sizeof(float)); + // sycl::event copy_3 = exec_q.memcpy(fp_h.data(), fp.get_data(), m * sizeof(float)); + // sycl::event copy_4 = exec_q.memcpy(output_h.data(), output.get_data(), n * sizeof(float)); + + // copy_1.wait(); + // copy_2.wait(); + // copy_3.wait(); + // copy_4.wait(); + + // std::cout << "x: " << std::endl; + // for (size_t i = 0; i< n; i++){ + // std::cout << x_h[i] << " "; + // } + // std::cout << "\n"; + + // std::cout << "xp: " << std::endl; + // for (size_t i = 0; i< m; i++){ + // std::cout << xp_h[i] << " "; + // } + // std::cout << "\n"; + + + // std::cout << "fp: " << std::endl; + // for (size_t i = 0; i< m; i++){ + // std::cout << fp_h[i] << " "; + // } + // std::cout << "\n"; + + + // std::cout << "out: " << std::endl; + // for (size_t i = 0; i< n; i++){ + // std::cout << output_h[i] << " "; + // } + // std::cout << "\n"; + + auto ev = + interp_func(exec_q, x.get_data(), idx.get_data(), xp.get_data(), fp.get_data(), + output.get_data(), xp.get_size(), x.get_size(), depends); + + ev.wait(); + + auto args_ev = dpctl::utils::keep_args_alive( + exec_q, {x, idx, xp, fp, output}, {ev}); + + // size_t n = x.get_size(); + + // std::vector result(n); + // sycl::event copy_ev = exec_q.memcpy(result.data(), output.get_data(), n * sizeof(float), {ev}); + + // copy_ev.wait(); + + // std::cout << "out_host: " << std::endl; + // for (size_t i = 0; i< n; i++){ + // std::cout << result[i] << " "; + // } + + // std::cout << "\n"; + + return {args_ev, ev}; +} + +std::unique_ptr interp; + +void math::interpolate::populate_interpolate(py::module_ m) +{ + using namespace std::placeholders; + + interp.reset(new Interpolate()); + + auto interp_func = + [interpp = interp.get()]( + const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + const size_t xp_size, + dpctl::tensor::usm_ndarray &output, + const std::vector &depends) { + return interpp->call(x, idx, xp, fp, xp_size, output, depends); + }; + + m.def("interpolate", interp_func, "Perform linear interpolation.", + py::arg("x"), py::arg("idx"), py::arg("xp"), py::arg("fp"), + py::arg("xp_size"), py::arg("output"), py::arg("depends") = py::list()); + + auto interpolate_dtypes = [interpp = interp.get()]() { + return interpp->dispatch_table.get_all_supported_types(); + }; + + m.def("interpolate_dtypes", interpolate_dtypes, + "Get the supported data types for interpolation."); +} diff --git a/dpnp/backend/extensions/math/interpolate.hpp b/dpnp/backend/extensions/math/interpolate.hpp new file mode 100644 index 000000000000..95c34d945dad --- /dev/null +++ b/dpnp/backend/extensions/math/interpolate.hpp @@ -0,0 +1,66 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "dispatch_table.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace math +{ +namespace interpolate +{ +struct Interpolate +{ + using FnT = sycl::event (*)(sycl::queue &, + const void *, + const void *, + const void *, + const void *, + void *, + const size_t, + const size_t, + const std::vector &); + + common::DispatchTable2 dispatch_table; + + Interpolate(); + + std::tuple + call(const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + const size_t xp_size, + dpctl::tensor::usm_ndarray &output, + const std::vector &depends); +}; + +void populate_interpolate(py::module_ m); +} // namespace interpolate +} // namespace math diff --git a/dpnp/backend/extensions/math/math_py.cpp b/dpnp/backend/extensions/math/math_py.cpp new file mode 100644 index 000000000000..6b8b5b9b5f28 --- /dev/null +++ b/dpnp/backend/extensions/math/math_py.cpp @@ -0,0 +1,37 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._math_impl extensions +// +//***************************************************************************** + +#include + +#include "interpolate.hpp" + +PYBIND11_MODULE(_math_impl, m) +{ + math::interpolate::populate_interpolate(m); +} diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 3958127789ab..08b830b1d360 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -54,6 +54,8 @@ to_supported_dtypes, ) +import dpnp.backend.extensions.math._math_impl as math_ext + from .dpnp_utils import get_usm_allocations from .dpnp_utils.dpnp_utils_reduction import dpnp_wrap_reduction_call from .dpnp_utils.dpnp_utils_statistics import dpnp_cov, dpnp_median @@ -66,6 +68,7 @@ "corrcoef", "correlate", "cov", + "interp", "max", "mean", "median", @@ -1051,6 +1054,178 @@ def cov( ) +def interp(x, xp, fp, left=None, right=None, period=None): + """ + One-dimensional linear interpolation for monotonically increasing sample points. + + Returns the one-dimensional piecewise linear interpolant to a function + with given discrete data points (`xp`, `fp`), evaluated at `x`. + + Parameters + ---------- + x : array_like + The x-coordinates at which to evaluate the interpolated values. + + xp : 1-D sequence of floats + The x-coordinates of the data points, must be increasing if argument + `period` is not specified. Otherwise, `xp` is internally sorted after + normalizing the periodic boundaries with ``xp = xp % period``. + + fp : 1-D sequence of float or complex + The y-coordinates of the data points, same length as `xp`. + + left : optional float or complex corresponding to fp + Value to return for `x < xp[0]`, default is `fp[0]`. + + right : optional float or complex corresponding to fp + Value to return for `x > xp[-1]`, default is `fp[-1]`. + + period : None or float, optional + A period for the x-coordinates. This parameter allows the proper + interpolation of angular x-coordinates. Parameters `left` and `right` + are ignored if `period` is specified. + + Returns + ------- + y : float or complex (corresponding to fp) or ndarray + The interpolated values, same shape as `x`. + + Raises + ------ + ValueError + If `xp` and `fp` have different length + If `xp` or `fp` are not 1-D sequences + If `period == 0` + + See Also + -------- + scipy.interpolate + + Warnings + -------- + The x-coordinate sequence is expected to be increasing, but this is not + explicitly enforced. However, if the sequence `xp` is non-increasing, + interpolation results are meaningless. + + Note that, since NaN is unsortable, `xp` also cannot contain NaNs. + + A simple check for `xp` being strictly increasing is:: + + np.all(np.diff(xp) > 0) + + Examples + -------- + >>> import dpnp as np + >>> xp = np.array([1, 2, 3]) + >>> fp = np.array([3 ,2 ,0]) + >>> x = np.array([2.5]) + >>> np.interp(2.5, xp, fp) + 1.0 + >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp) + array([3. , 3. , 2.5 , 0.56, 0. ]) + >>> UNDEF = -99.0 + >>> np.interp(3.14, xp, fp, right=UNDEF) + -99.0 + + Plot an interpolant to the sine function: + + >>> x = np.linspace(0, 2*np.pi, 10) + >>> y = np.sin(x) + >>> xvals = np.linspace(0, 2*np.pi, 50) + >>> yinterp = np.interp(xvals, x, y) + >>> import matplotlib.pyplot as plt + >>> plt.plot(x, y, 'o') + [] + >>> plt.plot(xvals, yinterp, '-x') + [] + >>> plt.show() + + Interpolation with periodic x-coordinates: + + >>> x = [-180, -170, -185, 185, -10, -5, 0, 365] + >>> xp = [190, -190, 350, -350] + >>> fp = [5, 10, 3, 4] + >>> np.interp(x, xp, fp, period=360) + array([7.5 , 5. , 8.75, 6.25, 3. , 3.25, 3.5 , 3.75]) + + Complex interpolation: + + >>> x = [1.5, 4.0] + >>> xp = [2,3,5] + >>> fp = [1.0j, 0, 2+3j] + >>> np.interp(x, xp, fp) + array([0.+1.j , 1.+1.5j]) + + """ + + dpnp.check_supported_arrays_type(x, xp, fp) + + if xp.ndim != 1 or fp.ndim != 1: + raise ValueError('xp and fp must be 1D arrays') + if xp.size != fp.size: + raise ValueError('fp and xp are not of the same length') + if xp.size == 0: + raise ValueError('array of sample points is empty') + if not x.flags.c_contiguous: + raise NotImplementedError('Non-C-contiguous x is currently not ' + 'supported') + x_dtype = dpnp.common_type(x, xp) + if not dpnp.can_cast(x_dtype, dpnp.default_float_type()): + raise TypeError('Cannot cast array data from' + ' {} to {} according to the rule \'safe\'' + .format(x_dtype, dpnp.default_float_type())) + + if period is not None: + # The handling of "period" below is modified from NumPy's + + if period == 0: + raise ValueError("period must be a non-zero value") + period = dpnp.abs(period) + left = None + right = None + + x = x.astype(dpnp.default_float_type()) + xp = xp.astype(dpnp.default_float_type()) + + # normalizing periodic boundaries + x %= period + xp %= period + asort_xp = dpnp.argsort(xp) + xp = xp[asort_xp] + fp = fp[asort_xp] + xp = dpnp.concatenate((xp[-1:]-period, xp, xp[0:1]+period)) + fp = dpnp.concatenate((fp[-1:], fp, fp[0:1])) + assert xp.flags.c_contiguous + assert fp.flags.c_contiguous + + # NumPy always returns float64 or complex128, so we upcast all values + # on the fly in the kernel + out_dtype = 'f8' + output = dpnp.empty(x.shape, dtype=out_dtype) + idx = dpnp.searchsorted(xp, x, side='right') + left = fp[0] if left is None else dpnp.array(left, fp.dtype) + right = fp[-1] if right is None else dpnp.array(right, fp.dtype) + + idx = dpnp.array(idx, dtype='uint64') + + queue = x.sycl_queue + _manager = dpu.SequentialOrderManager[queue] + mem_ev, ht_ev = math_ext.interpolate( + x.get_array(), + idx.get_array(), + xp.get_array(), + fp.get_array(), + xp.size, + # left, + # right, + output.get_array(), + depends=_manager.submitted_events, + ) + _manager.add_event_pair(mem_ev, ht_ev) + + return output + + def max(a, axis=None, out=None, keepdims=False, initial=None, where=True): """ Return the maximum of an array or maximum along an axis. From e2b20b0bd6c2711b47983483f35361d172765730 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 2 Apr 2025 04:31:53 -0700 Subject: [PATCH 02/37] Second impl with dispatch_vector[only floating] --- dpnp/backend/extensions/math/interpolate.cpp | 322 +++--------------- dpnp/backend/extensions/math/interpolate.hpp | 41 +-- .../extensions/math/interpolate_kernel.hpp | 80 +++++ dpnp/backend/extensions/math/math_py.cpp | 2 +- dpnp/dpnp_iface_statistics.py | 6 +- .../third_party/cupy/test_type_routines.py | 4 + 6 files changed, 142 insertions(+), 313 deletions(-) create mode 100644 dpnp/backend/extensions/math/interpolate_kernel.hpp diff --git a/dpnp/backend/extensions/math/interpolate.cpp b/dpnp/backend/extensions/math/interpolate.cpp index 9b69affc5ea0..a620299bfffa 100644 --- a/dpnp/backend/extensions/math/interpolate.cpp +++ b/dpnp/backend/extensions/math/interpolate.cpp @@ -37,302 +37,78 @@ #include "utils/type_dispatch.hpp" #include "interpolate.hpp" +#include "interpolate_kernel.hpp" -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; -using dpctl::tensor::usm_ndarray; +namespace dpnp::extensions::math +{ -using namespace math::interpolate; +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; -namespace -{ +static kernels::interpolate_fn_ptr_t interpolate_dispatch_table[td_ns::num_types]; -template -struct interpolate_kernel +template +struct InterpolateFactory { - static sycl::event impl(sycl::queue &exec_q, - const void *vx, - const void *vidx, - const void *vxp, - const void *vfp, - void *vout, - const size_t xp_size, - const size_t n, - const std::vector &depends) + fnT get() { - const T *x = static_cast(vx); - const T *xp = static_cast(vxp); - const T *fp = static_cast(vfp); - const IndexT *idx = static_cast(vidx); - T *out = static_cast(vout); - - // size_t n = x.get_size(); - - // std::vector result(n); - // sycl::event copy_ev = exec_q.memcpy(result.data(), output.get_data(), n * sizeof(float), {ev}); - - // for (size_t i = 0; i< n; i++){ - // std::cout << result[i] << " "; - // } - - return exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(depends); - - cgh.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { - // T left = fp[0]; - // T right = fp[xp_size - 1]; - - // // IndexT x_idx = idx[i] - 1; - - // // if (sycl::isnan(x[i])) { - // // out[i] = x[i]; - // // } - // // else if (x_idx < 0) { - // // out[i] = left; - // // } - // // //old version check - // // // else if (x[i] == xp[xp_size - 1]) { - // // // out[i] = right; - // // // } - // // // else if (idx[i] >= xp_size - 1) { - // // // out[i] = right; - // // // } - // // // new version check - // // else if (idx[i] == xp_size) { - // // out[i] = right; - // // } - // // else if (idx[i] == xp_size - 1) { - // // out[i] = fp[x_idx]; - // // } - // // else if (x[i] == xp[x_idx]) { - // // out[i] = fp[x_idx]; - // // } - - // IndexT j = idx[i]; - - // if (sycl::isnan(x[i])) { - // out[i] = x[i]; - // } - // else if (j == 0) { - // out[i] = left; - // } - // else if (j == xp_size) { - // out[i] = right; - // } - // else { - // IndexT x_idx = j - 1; - - // if (x[i] == xp[x_idx]) { - // out[i] = fp[x_idx]; - // } - // else { - // T slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); - // T res = slope * (x[i] - xp[x_idx]) + fp[x_idx]; - // // T res = (x[i] - xp[x_idx]) + fp[x_idx]; - - // if (sycl::isnan(res)) { - // res = slope * (x[i] - xp[x_idx + 1]) + fp[x_idx + 1]; - // if (sycl::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { - // res = fp[x_idx]; - // } - // } - // out[i] = res; - // } - // } - - T left = fp[0]; - T right = fp[xp_size - 1]; - IndexT x_idx = idx[i] - 1; - - if (sycl::isnan(x[i])) { - out[i] = x[i]; - } - else if (x_idx < 0) { - out[i] = left; - } - else if (x[i] == xp[xp_size - 1]) { - out[i] = right; - } - else if (x_idx >= xp_size - 1) { - out[i] = right; - } - else if (x[i] == xp[x_idx]) { - out[i] = fp[x_idx]; - } - else { - T slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); - T res = slope * (x[i] - xp[x_idx]) + fp[x_idx]; - - if (sycl::isnan(res)) { - res = slope * (x[i] - xp[x_idx + 1]) + fp[x_idx + 1]; - if (sycl::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { - res = fp[x_idx]; - } - } - - out[i] = res; - } - - // out[i] = x[i]; - }); - }); + if constexpr (std::is_floating_point_v) { + return kernels::interpolate_impl; + } + else { + return nullptr; + } } }; -// using SupportedTypes = std::tuple, std::tuple>; -// using SupportedTypes = std::tuple, -// std::tuple, -// std::tuple, -// std::tuple, -// std::tuple, -// std::tuple, -// std::tuple>, -// std::tuple>, -// std::tuple>, -// std::tuple>, -// std::tuple, -// std::tuple, -// std::tuple, -// std::tuple, -// std::tuple>, -// std::tuple>, -// std::tuple, int64_t>, -// std::tuple, int64_t>, -// std::tuple, float>, -// std::tuple, double>>; - -using SupportedTypes = std::tuple< - // std::tuple, - // std::tuple, - std::tuple, - std::tuple>; - -} // namespace - -Interpolate::Interpolate() : dispatch_table("x", "idx") -{ - dispatch_table.populate_dispatch_table(); -} -std::tuple -Interpolate::call(const dpctl::tensor::usm_ndarray &x, - const dpctl::tensor::usm_ndarray &idx, - const dpctl::tensor::usm_ndarray &xp, - const dpctl::tensor::usm_ndarray &fp, - const size_t xp_size, - dpctl::tensor::usm_ndarray &output, - const std::vector &depends) +std::pair +py_interpolate(const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + dpctl::tensor::usm_ndarray &out, + sycl::queue &exec_q, + const std::vector &depends) { - // validate(x, xp, fp, output); + int typenum = x.get_typenum(); + auto array_types = td_ns::usm_ndarray_types(); + int type_id = array_types.typenum_to_lookup_id(typenum); - if (x.get_size() == 0) { - return {sycl::event(), sycl::event()}; + auto fn = interpolate_dispatch_table[type_id]; + if (!fn) { + throw py::type_error("Unsupported dtype."); } - const int x_typenum = x.get_typenum(); - const int idx_typenum = idx.get_typenum(); + std::size_t n = x.get_size(); + std::size_t xp_size = xp.get_size(); - auto interp_func = dispatch_table.get(x_typenum, idx_typenum); + sycl::event ev = fn(exec_q, x.get_data(), idx.get_data(), xp.get_data(), + fp.get_data(), out.get_data(), n, xp_size, depends); - auto exec_q = x.get_queue(); + sycl::event keep = dpctl::utils::keep_args_alive(exec_q, {x, idx, xp, fp, out}, {ev}); - // size_t n = x.get_size(); - // const size_t m = xp.get_size(); - - // std::vector x_h(n); - // std::vector xp_h(n); - // std::vector fp_h(n); - // std::vector output_h(n); - - // sycl::event copy_1 = exec_q.memcpy(x_h.data(), x.get_data(), n * sizeof(float)); - // sycl::event copy_2 = exec_q.memcpy(xp_h.data(), xp.get_data(), m * sizeof(float)); - // sycl::event copy_3 = exec_q.memcpy(fp_h.data(), fp.get_data(), m * sizeof(float)); - // sycl::event copy_4 = exec_q.memcpy(output_h.data(), output.get_data(), n * sizeof(float)); - - // copy_1.wait(); - // copy_2.wait(); - // copy_3.wait(); - // copy_4.wait(); - - // std::cout << "x: " << std::endl; - // for (size_t i = 0; i< n; i++){ - // std::cout << x_h[i] << " "; - // } - // std::cout << "\n"; - - // std::cout << "xp: " << std::endl; - // for (size_t i = 0; i< m; i++){ - // std::cout << xp_h[i] << " "; - // } - // std::cout << "\n"; - - - // std::cout << "fp: " << std::endl; - // for (size_t i = 0; i< m; i++){ - // std::cout << fp_h[i] << " "; - // } - // std::cout << "\n"; - - - // std::cout << "out: " << std::endl; - // for (size_t i = 0; i< n; i++){ - // std::cout << output_h[i] << " "; - // } - // std::cout << "\n"; - - auto ev = - interp_func(exec_q, x.get_data(), idx.get_data(), xp.get_data(), fp.get_data(), - output.get_data(), xp.get_size(), x.get_size(), depends); - - ev.wait(); - - auto args_ev = dpctl::utils::keep_args_alive( - exec_q, {x, idx, xp, fp, output}, {ev}); - - // size_t n = x.get_size(); - - // std::vector result(n); - // sycl::event copy_ev = exec_q.memcpy(result.data(), output.get_data(), n * sizeof(float), {ev}); - - // copy_ev.wait(); - - // std::cout << "out_host: " << std::endl; - // for (size_t i = 0; i< n; i++){ - // std::cout << result[i] << " "; - // } - - // std::cout << "\n"; - - return {args_ev, ev}; + return std::make_pair(keep, ev); } -std::unique_ptr interp; -void math::interpolate::populate_interpolate(py::module_ m) +void init_interpolate_dispatch_table() { - using namespace std::placeholders; + using namespace td_ns; + using kernels::interpolate_fn_ptr_t; - interp.reset(new Interpolate()); + DispatchVectorBuilder + dtb_interpolate; + dtb_interpolate.populate_dispatch_vector(interpolate_dispatch_table); +} - auto interp_func = - [interpp = interp.get()]( - const dpctl::tensor::usm_ndarray &x, - const dpctl::tensor::usm_ndarray &idx, - const dpctl::tensor::usm_ndarray &xp, - const dpctl::tensor::usm_ndarray &fp, - const size_t xp_size, - dpctl::tensor::usm_ndarray &output, - const std::vector &depends) { - return interpp->call(x, idx, xp, fp, xp_size, output, depends); - }; +void init_interpolate(py::module_ m) +{ + dpnp::extensions::math::init_interpolate_dispatch_table(); - m.def("interpolate", interp_func, "Perform linear interpolation.", + m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"), py::arg("xp"), py::arg("fp"), - py::arg("xp_size"), py::arg("output"), py::arg("depends") = py::list()); - - auto interpolate_dtypes = [interpp = interp.get()]() { - return interpp->dispatch_table.get_all_supported_types(); - }; - - m.def("interpolate_dtypes", interpolate_dtypes, - "Get the supported data types for interpolation."); + py::arg("out"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } + +} // namespace dpnp::extensions::math diff --git a/dpnp/backend/extensions/math/interpolate.hpp b/dpnp/backend/extensions/math/interpolate.hpp index 95c34d945dad..e5df239aabdf 100644 --- a/dpnp/backend/extensions/math/interpolate.hpp +++ b/dpnp/backend/extensions/math/interpolate.hpp @@ -25,42 +25,11 @@ #pragma once -#include +#include -#include "dispatch_table.hpp" +namespace py = pybind11; -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - -namespace math -{ -namespace interpolate +namespace dpnp::extensions::math { -struct Interpolate -{ - using FnT = sycl::event (*)(sycl::queue &, - const void *, - const void *, - const void *, - const void *, - void *, - const size_t, - const size_t, - const std::vector &); - - common::DispatchTable2 dispatch_table; - - Interpolate(); - - std::tuple - call(const dpctl::tensor::usm_ndarray &x, - const dpctl::tensor::usm_ndarray &idx, - const dpctl::tensor::usm_ndarray &xp, - const dpctl::tensor::usm_ndarray &fp, - const size_t xp_size, - dpctl::tensor::usm_ndarray &output, - const std::vector &depends); -}; - -void populate_interpolate(py::module_ m); -} // namespace interpolate -} // namespace math +void init_interpolate(py::module_ m); +} // namespace dpnp::extensions::math diff --git a/dpnp/backend/extensions/math/interpolate_kernel.hpp b/dpnp/backend/extensions/math/interpolate_kernel.hpp new file mode 100644 index 000000000000..bf63de08c280 --- /dev/null +++ b/dpnp/backend/extensions/math/interpolate_kernel.hpp @@ -0,0 +1,80 @@ +#pragma once + +#include +#include +#include +#include + +#include + +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::math::kernels +{ + +using interpolate_fn_ptr_t = sycl::event (*)(sycl::queue &, + const void *, // x + const void *, // idx + const void *, // xp + const void *, // fp + void *, // out + std::size_t, // n + std::size_t, // xp_size + const std::vector &); + +template +sycl::event interpolate_impl(sycl::queue &q, + const void *vx, + const void *vidx, + const void *vxp, + const void *vfp, + void *vout, + std::size_t n, + std::size_t xp_size, + const std::vector &depends) +{ + const T *x = static_cast(vx); + const std::size_t *idx = static_cast(vidx); + const T *xp = static_cast(vxp); + const T *fp = static_cast(vfp); + T *out = static_cast(vout); + + return q.submit([&](sycl::handler &h) { + h.depends_on(depends); + h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { + T left = fp[0]; + T right = fp[xp_size - 1]; + std::size_t x_idx = idx[i] - 1; + + if (sycl::isnan(x[i])) { + out[i] = x[i]; + } + else if (x_idx < 0) { + out[i] = left; + } + else if (x[i] == xp[xp_size - 1]) { + out[i] = right; + } + else if (x_idx >= xp_size - 1) { + out[i] = right; + } + else if (x[i] == xp[x_idx]) { + out[i] = fp[x_idx]; + } + else { + T slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); + T res = slope * (x[i] - xp[x_idx]) + fp[x_idx]; + + if (sycl::isnan(res)) { + res = slope * (x[i] - xp[x_idx + 1]) + fp[x_idx + 1]; + if (sycl::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { + res = fp[x_idx]; + } + } + out[i] = res; + } + }); + }); +} + +} // namespace dpnp::extensions::math::kernels diff --git a/dpnp/backend/extensions/math/math_py.cpp b/dpnp/backend/extensions/math/math_py.cpp index 6b8b5b9b5f28..29348d9e437c 100644 --- a/dpnp/backend/extensions/math/math_py.cpp +++ b/dpnp/backend/extensions/math/math_py.cpp @@ -33,5 +33,5 @@ PYBIND11_MODULE(_math_impl, m) { - math::interpolate::populate_interpolate(m); + dpnp::extensions::math::init_interpolate(m); } diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 08b830b1d360..72da99ca9e86 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -1200,7 +1200,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): # NumPy always returns float64 or complex128, so we upcast all values # on the fly in the kernel - out_dtype = 'f8' + out_dtype = x_dtype output = dpnp.empty(x.shape, dtype=out_dtype) idx = dpnp.searchsorted(xp, x, side='right') left = fp[0] if left is None else dpnp.array(left, fp.dtype) @@ -1210,15 +1210,15 @@ def interp(x, xp, fp, left=None, right=None, period=None): queue = x.sycl_queue _manager = dpu.SequentialOrderManager[queue] - mem_ev, ht_ev = math_ext.interpolate( + mem_ev, ht_ev = math_ext._interpolate( x.get_array(), idx.get_array(), xp.get_array(), fp.get_array(), - xp.size, # left, # right, output.get_array(), + queue, depends=_manager.submitted_events, ) _manager.add_event_pair(mem_ev, ht_ev) diff --git a/dpnp/tests/third_party/cupy/test_type_routines.py b/dpnp/tests/third_party/cupy/test_type_routines.py index e35b40d90841..bf5c7af9ded0 100644 --- a/dpnp/tests/third_party/cupy/test_type_routines.py +++ b/dpnp/tests/third_party/cupy/test_type_routines.py @@ -47,6 +47,10 @@ def test_can_cast(self, xp, from_dtype, to_dtype): return ret +<<<<<<< HEAD +======= +# @pytest.mark.skip("dpnp.common_type() is not implemented yet") +>>>>>>> e8871f0d797 (Second impl with dispatch_vector[only floating]) class TestCommonType(unittest.TestCase): @testing.numpy_cupy_equal() From f7d1da94dfbaf36ada8bf0d6ec73b75406970f0c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 2 Apr 2025 04:32:50 -0700 Subject: [PATCH 03/37] Implement interpolate_complex --- dpnp/backend/extensions/math/CMakeLists.txt | 1 - dpnp/backend/extensions/math/common.cpp | 75 ----- dpnp/backend/extensions/math/common.hpp | 93 ------ .../extensions/math/dispatch_table.hpp | 292 ------------------ dpnp/backend/extensions/math/interpolate.cpp | 79 ++--- .../extensions/math/interpolate_kernel.hpp | 132 ++++++-- dpnp/dpnp_iface_statistics.py | 30 +- 7 files changed, 166 insertions(+), 536 deletions(-) delete mode 100644 dpnp/backend/extensions/math/common.cpp delete mode 100644 dpnp/backend/extensions/math/common.hpp delete mode 100644 dpnp/backend/extensions/math/dispatch_table.hpp diff --git a/dpnp/backend/extensions/math/CMakeLists.txt b/dpnp/backend/extensions/math/CMakeLists.txt index fed91cfd3f9a..eed898b12496 100644 --- a/dpnp/backend/extensions/math/CMakeLists.txt +++ b/dpnp/backend/extensions/math/CMakeLists.txt @@ -26,7 +26,6 @@ set(python_module_name _math_impl) set(_module_src - ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cpp ${CMAKE_CURRENT_SOURCE_DIR}/math_py.cpp ) diff --git a/dpnp/backend/extensions/math/common.cpp b/dpnp/backend/extensions/math/common.cpp deleted file mode 100644 index 93723e838b21..000000000000 --- a/dpnp/backend/extensions/math/common.cpp +++ /dev/null @@ -1,75 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include "common.hpp" -#include "utils/type_dispatch.hpp" -#include - -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - -namespace math -{ -namespace common -{ -pybind11::dtype dtype_from_typenum(int dst_typenum) -{ - dpctl_td_ns::typenum_t dst_typenum_t = - static_cast(dst_typenum); - switch (dst_typenum_t) { - case dpctl_td_ns::typenum_t::BOOL: - return py::dtype("?"); - case dpctl_td_ns::typenum_t::INT8: - return py::dtype("i1"); - case dpctl_td_ns::typenum_t::UINT8: - return py::dtype("u1"); - case dpctl_td_ns::typenum_t::INT16: - return py::dtype("i2"); - case dpctl_td_ns::typenum_t::UINT16: - return py::dtype("u2"); - case dpctl_td_ns::typenum_t::INT32: - return py::dtype("i4"); - case dpctl_td_ns::typenum_t::UINT32: - return py::dtype("u4"); - case dpctl_td_ns::typenum_t::INT64: - return py::dtype("i8"); - case dpctl_td_ns::typenum_t::UINT64: - return py::dtype("u8"); - case dpctl_td_ns::typenum_t::HALF: - return py::dtype("f2"); - case dpctl_td_ns::typenum_t::FLOAT: - return py::dtype("f4"); - case dpctl_td_ns::typenum_t::DOUBLE: - return py::dtype("f8"); - case dpctl_td_ns::typenum_t::CFLOAT: - return py::dtype("c8"); - case dpctl_td_ns::typenum_t::CDOUBLE: - return py::dtype("c16"); - default: - throw py::value_error("Unrecognized dst_typeid"); - } -} - -} // namespace common -} // namespace math diff --git a/dpnp/backend/extensions/math/common.hpp b/dpnp/backend/extensions/math/common.hpp deleted file mode 100644 index 1d436440c392..000000000000 --- a/dpnp/backend/extensions/math/common.hpp +++ /dev/null @@ -1,93 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include -#include -#include - -// clang-format off -// math_utils.hpp doesn't include sycl header but uses sycl types -// so sycl.hpp must be included before math_utils.hpp -#include -#include "utils/math_utils.hpp" -// clang-format on - -namespace math -{ -namespace common -{ -template -struct Less -{ - bool operator()(const T &lhs, const T &rhs) const - { - return std::less{}(lhs, rhs); - } -}; - -template -struct Less> -{ - bool operator()(const std::complex &lhs, - const std::complex &rhs) const - { - return dpctl::tensor::math_utils::less_complex(lhs, rhs); - } -}; - -template -struct IsNan -{ - static bool isnan(const T &v) - { - if constexpr (std::is_floating_point_v || - std::is_same_v) { - return sycl::isnan(v); - } - - return false; - } -}; - -template -struct IsNan> -{ - static bool isnan(const std::complex &v) - { - T real1 = std::real(v); - T imag1 = std::imag(v); - return sycl::isnan(real1) || sycl::isnan(imag1); - } -}; - - -// This function is a copy from dpctl because it is not available in the public -// headers of dpctl. -pybind11::dtype dtype_from_typenum(int dst_typenum); - -} // namespace common -} // namespace math diff --git a/dpnp/backend/extensions/math/dispatch_table.hpp b/dpnp/backend/extensions/math/dispatch_table.hpp deleted file mode 100644 index 4cfd3d2a09a4..000000000000 --- a/dpnp/backend/extensions/math/dispatch_table.hpp +++ /dev/null @@ -1,292 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include -#include - -#include "utils/type_dispatch.hpp" -#include -#include -#include -#include - -#include "common.hpp" - -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; -namespace py = pybind11; - -namespace math -{ -namespace common -{ - -template -struct one_of -{ - static_assert(std::is_same_v>, - "one_of: second parameter cannot be empty std::tuple"); - static_assert(false, "one_of: second parameter must be std::tuple"); -}; - -template -struct one_of> -{ - static constexpr bool value = - std::is_same_v || one_of>::value; -}; - -template -struct one_of> -{ - static constexpr bool value = std::is_same_v; -}; - -template -constexpr bool one_of_v = one_of::value; - -template -using Table = FnT[dpctl_td_ns::num_types]; -template -using Table2 = Table[dpctl_td_ns::num_types]; - -using TypeId = int32_t; -using TypesPair = std::pair; - -struct int_pair_hash -{ - inline size_t operator()(const TypesPair &p) const - { - std::hash hasher; - return hasher(size_t(p.first) << (8 * sizeof(TypeId)) | - size_t(p.second)); - } -}; - -using SupportedTypesList = std::vector; -using SupportedTypesList2 = std::vector; -using SupportedTypesSet = std::unordered_set; -using SupportedTypesSet2 = std::unordered_set; - -using DType = py::dtype; -using DTypePair = std::pair; - -using SupportedDTypeList = std::vector; -using SupportedDTypeList2 = std::vector; - -template - typename Func> -struct TableBuilder2 -{ - template - struct impl - { - static constexpr bool is_defined = - one_of_v, SupportedTypes>; - - _FnT get() - { - if constexpr (is_defined) { - return Func::impl; - } - else { - return nullptr; - } - } - }; - - using type = - dpctl_td_ns::DispatchTableBuilder; -}; - -template -class DispatchTable2 -{ -public: - DispatchTable2(std::string first_name, std::string second_name) - : first_name(first_name), second_name(second_name) - { - } - - template - typename Func> - void populate_dispatch_table() - { - using TBulder = typename TableBuilder2::type; - TBulder builder; - - builder.populate_dispatch_table(table); - populate_supported_types(); - } - - FnT get_unsafe(int first_typenum, int second_typenum) const - { - auto array_types = dpctl_td_ns::usm_ndarray_types(); - const int first_type_id = - array_types.typenum_to_lookup_id(first_typenum); - const int second_type_id = - array_types.typenum_to_lookup_id(second_typenum); - - return table[first_type_id][second_type_id]; - } - - FnT get(int first_typenum, int second_typenum) const - { - auto fn = get_unsafe(first_typenum, second_typenum); - - if (fn == nullptr) { - auto array_types = dpctl_td_ns::usm_ndarray_types(); - const int first_type_id = - array_types.typenum_to_lookup_id(first_typenum); - const int second_type_id = - array_types.typenum_to_lookup_id(second_typenum); - - py::dtype first_dtype = dtype_from_typenum(first_type_id); - auto first_type_pos = - std::find(supported_first_type.begin(), - supported_first_type.end(), first_dtype); - if (first_type_pos == supported_first_type.end()) { - py::str types = py::str(py::cast(supported_first_type)); - py::str dtype = py::str(first_dtype); - - py::str err_msg = - py::str("'" + first_name + "' has unsupported type '") + - dtype + - py::str("'." - " Supported types are: ") + - types; - - throw py::value_error(static_cast(err_msg)); - } - - py::dtype second_dtype = dtype_from_typenum(second_type_id); - auto second_type_pos = - std::find(supported_second_type.begin(), - supported_second_type.end(), second_dtype); - if (second_type_pos == supported_second_type.end()) { - py::str types = py::str(py::cast(supported_second_type)); - py::str dtype = py::str(second_dtype); - - py::str err_msg = - py::str("'" + second_name + "' has unsupported type '") + - dtype + - py::str("'." - " Supported types are: ") + - types; - - throw py::value_error(static_cast(err_msg)); - } - - py::str first_dtype_str = py::str(first_dtype); - py::str second_dtype_str = py::str(second_dtype); - py::str types = py::str(py::cast(all_supported_types)); - - py::str err_msg = - py::str("'" + first_name + "' and '" + second_name + - "' has unsupported types combination: ('") + - first_dtype_str + py::str("', '") + second_dtype_str + - py::str("')." - " Supported types combinations are: ") + - types; - - throw py::value_error(static_cast(err_msg)); - } - - return fn; - } - - const SupportedDTypeList &get_supported_first_type() const - { - return supported_first_type; - } - - const SupportedDTypeList &get_supported_second_type() const - { - return supported_second_type; - } - - const SupportedDTypeList2 &get_all_supported_types() const - { - return all_supported_types; - } - -private: - void populate_supported_types() - { - SupportedTypesSet first_supported_types_set; - SupportedTypesSet second_supported_types_set; - SupportedTypesSet2 all_supported_types_set; - - for (int i = 0; i < dpctl_td_ns::num_types; ++i) { - for (int j = 0; j < dpctl_td_ns::num_types; ++j) { - if (table[i][j] != nullptr) { - all_supported_types_set.emplace(i, j); - first_supported_types_set.emplace(i); - second_supported_types_set.emplace(j); - } - } - } - - auto to_supported_dtype_list = [](const auto &supported_set, - auto &supported_list) { - SupportedTypesList lst(supported_set.begin(), supported_set.end()); - std::sort(lst.begin(), lst.end()); - supported_list.resize(supported_set.size()); - std::transform(lst.begin(), lst.end(), supported_list.begin(), - [](TypeId i) { return dtype_from_typenum(i); }); - }; - - to_supported_dtype_list(first_supported_types_set, - supported_first_type); - to_supported_dtype_list(second_supported_types_set, - supported_second_type); - - SupportedTypesList2 lst(all_supported_types_set.begin(), - all_supported_types_set.end()); - std::sort(lst.begin(), lst.end()); - all_supported_types.resize(all_supported_types_set.size()); - std::transform(lst.begin(), lst.end(), all_supported_types.begin(), - [](TypesPair p) { - return DTypePair(dtype_from_typenum(p.first), - dtype_from_typenum(p.second)); - }); - } - - std::string first_name; - std::string second_name; - - SupportedDTypeList supported_first_type; - SupportedDTypeList supported_second_type; - SupportedDTypeList2 all_supported_types; - - Table2 table; -}; - -} // namespace common -} // namespace math diff --git a/dpnp/backend/extensions/math/interpolate.cpp b/dpnp/backend/extensions/math/interpolate.cpp index a620299bfffa..c1549826e77c 100644 --- a/dpnp/backend/extensions/math/interpolate.cpp +++ b/dpnp/backend/extensions/math/interpolate.cpp @@ -23,10 +23,6 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include -#include -#include -#include #include #include @@ -45,37 +41,26 @@ namespace dpnp::extensions::math namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -static kernels::interpolate_fn_ptr_t interpolate_dispatch_table[td_ns::num_types]; - -template -struct InterpolateFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return kernels::interpolate_impl; - } - else { - return nullptr; - } - } -}; - +static kernels::interpolate_fn_ptr_t + interpolate_dispatch_table[td_ns::num_types][td_ns::num_types]; std::pair -py_interpolate(const dpctl::tensor::usm_ndarray &x, - const dpctl::tensor::usm_ndarray &idx, - const dpctl::tensor::usm_ndarray &xp, - const dpctl::tensor::usm_ndarray &fp, - dpctl::tensor::usm_ndarray &out, - sycl::queue &exec_q, - const std::vector &depends) + py_interpolate(const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + dpctl::tensor::usm_ndarray &out, + sycl::queue &exec_q, + const std::vector &depends) { - int typenum = x.get_typenum(); + int xp_typenum = xp.get_typenum(); + int fp_typenum = fp.get_typenum(); + auto array_types = td_ns::usm_ndarray_types(); - int type_id = array_types.typenum_to_lookup_id(typenum); + int xp_type_id = array_types.typenum_to_lookup_id(xp_typenum); + int fp_type_id = array_types.typenum_to_lookup_id(fp_typenum); - auto fn = interpolate_dispatch_table[type_id]; + auto fn = interpolate_dispatch_table[xp_type_id][fp_type_id]; if (!fn) { throw py::type_error("Unsupported dtype."); } @@ -86,29 +71,51 @@ py_interpolate(const dpctl::tensor::usm_ndarray &x, sycl::event ev = fn(exec_q, x.get_data(), idx.get_data(), xp.get_data(), fp.get_data(), out.get_data(), n, xp_size, depends); - sycl::event keep = dpctl::utils::keep_args_alive(exec_q, {x, idx, xp, fp, out}, {ev}); + sycl::event keep = + dpctl::utils::keep_args_alive(exec_q, {x, idx, xp, fp, out}, {ev}); return std::make_pair(keep, ev); } +template +struct InterpolateFactory +{ + fnT get() + { + if constexpr (std::is_floating_point_v && + std::is_floating_point_v) + { + return kernels::interpolate_impl; + } + else if constexpr (std::is_floating_point_v && + (std::is_same_v> || + std::is_same_v>)) + { + return kernels::interpolate_complex_impl; + } + else { + return nullptr; + } + } +}; void init_interpolate_dispatch_table() { using namespace td_ns; using kernels::interpolate_fn_ptr_t; - DispatchVectorBuilder + DispatchTableBuilder dtb_interpolate; - dtb_interpolate.populate_dispatch_vector(interpolate_dispatch_table); + dtb_interpolate.populate_dispatch_table(interpolate_dispatch_table); } void init_interpolate(py::module_ m) { dpnp::extensions::math::init_interpolate_dispatch_table(); - m.def("_interpolate", &py_interpolate, "", - py::arg("x"), py::arg("idx"), py::arg("xp"), py::arg("fp"), - py::arg("out"), py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"), + py::arg("xp"), py::arg("fp"), py::arg("out"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } } // namespace dpnp::extensions::math diff --git a/dpnp/backend/extensions/math/interpolate_kernel.hpp b/dpnp/backend/extensions/math/interpolate_kernel.hpp index bf63de08c280..82ce0c19a33d 100644 --- a/dpnp/backend/extensions/math/interpolate_kernel.hpp +++ b/dpnp/backend/extensions/math/interpolate_kernel.hpp @@ -2,10 +2,6 @@ #include #include -#include -#include - -#include #include "utils/type_utils.hpp" @@ -13,16 +9,16 @@ namespace dpnp::extensions::math::kernels { using interpolate_fn_ptr_t = sycl::event (*)(sycl::queue &, - const void *, // x - const void *, // idx - const void *, // xp - const void *, // fp - void *, // out - std::size_t, // n - std::size_t, // xp_size + const void *, // x + const void *, // idx + const void *, // xp + const void *, // fp + void *, // out + std::size_t, // n + std::size_t, // xp_size const std::vector &); -template +template sycl::event interpolate_impl(sycl::queue &q, const void *vx, const void *vidx, @@ -33,40 +29,43 @@ sycl::event interpolate_impl(sycl::queue &q, std::size_t xp_size, const std::vector &depends) { - const T *x = static_cast(vx); + const TCoord *x = static_cast(vx); const std::size_t *idx = static_cast(vidx); - const T *xp = static_cast(vxp); - const T *fp = static_cast(vfp); - T *out = static_cast(vout); + const TCoord *xp = static_cast(vxp); + const TValue *fp = static_cast(vfp); + TValue *out = static_cast(vout); return q.submit([&](sycl::handler &h) { h.depends_on(depends); h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { - T left = fp[0]; - T right = fp[xp_size - 1]; + TValue left = fp[0]; + TValue right = fp[xp_size - 1]; + + TCoord x_val = x[i]; std::size_t x_idx = idx[i] - 1; - if (sycl::isnan(x[i])) { - out[i] = x[i]; + if (sycl::isnan(x_val)) { + out[i] = x_val; } else if (x_idx < 0) { out[i] = left; } - else if (x[i] == xp[xp_size - 1]) { + else if (x_val == xp[xp_size - 1]) { out[i] = right; } else if (x_idx >= xp_size - 1) { out[i] = right; } - else if (x[i] == xp[x_idx]) { + else if (x_val == xp[x_idx]) { out[i] = fp[x_idx]; } else { - T slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); - T res = slope * (x[i] - xp[x_idx]) + fp[x_idx]; + TValue slope = + (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); + TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx]; if (sycl::isnan(res)) { - res = slope * (x[i] - xp[x_idx + 1]) + fp[x_idx + 1]; + res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; if (sycl::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { res = fp[x_idx]; } @@ -77,4 +76,87 @@ sycl::event interpolate_impl(sycl::queue &q, }); } +template +sycl::event interpolate_complex_impl(sycl::queue &q, + const void *vx, + const void *vidx, + const void *vxp, + const void *vfp, + void *vout, + std::size_t n, + std::size_t xp_size, + const std::vector &depends) +{ + const TCoord *x = static_cast(vx); + const std::size_t *idx = static_cast(vidx); + const TCoord *xp = static_cast(vxp); + const TValue *fp = static_cast(vfp); + TValue *out = static_cast(vout); + + using realT = typename TValue::value_type; + + return q.submit([&](sycl::handler &h) { + h.depends_on(depends); + h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { + realT left_r = fp[0].real(); + realT right_r = fp[xp_size - 1].real(); + realT left_i = fp[0].imag(); + realT right_i = fp[xp_size - 1].imag(); + + TCoord x_val = x[i]; + std::size_t x_idx = idx[i] - 1; + + realT res_r = 0.0; + realT res_i = 0.0; + + if (sycl::isnan(x_val)) { + res_r = x_val; + res_i = 0.0; + } + else if (x_idx < 0) { + res_r = left_r; + res_i = left_i; + } + else if (x_val == xp[xp_size - 1]) { + res_r = right_r; + res_i = right_i; + } + else if (x_idx >= xp_size - 1) { + res_r = right_r; + res_i = right_i; + } + else if (x_val == xp[x_idx]) { + res_r = fp[x_idx].real(); + res_i = fp[x_idx].imag(); + } + else { + realT dx = xp[x_idx + 1] - xp[x_idx]; + + realT slope_r = (fp[x_idx + 1].real() - fp[x_idx].real()) / dx; + res_r = slope_r * (x_val - xp[x_idx]) + fp[x_idx].real(); + if (sycl::isnan(res_r)) { + res_r = slope_r * (x_val - xp[x_idx + 1]) + + fp[x_idx + 1].real(); + if (sycl::isnan(res_r) && + fp[x_idx].real() == fp[x_idx + 1].real()) { + res_r = fp[x_idx].real(); + } + } + + realT slope_i = (fp[x_idx + 1].imag() - fp[x_idx].imag()) / dx; + res_i = slope_i * (x_val - xp[x_idx]) + fp[x_idx].imag(); + if (sycl::isnan(res_i)) { + res_i = slope_i * (x_val - xp[x_idx + 1]) + + fp[x_idx + 1].imag(); + if (sycl::isnan(res_i) && + fp[x_idx].imag() == fp[x_idx + 1].imag()) { + res_i = fp[x_idx].imag(); + } + } + } + out[i] = TValue(res_r, res_i); + }); + }); +} + } // namespace dpnp::extensions::math::kernels diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 72da99ca9e86..802243139de5 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -46,6 +46,7 @@ from dpctl.tensor._numpy_helper import normalize_axis_index import dpnp +import dpnp.backend.extensions.math._math_impl as math_ext # pylint: disable=no-name-in-module import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext @@ -54,8 +55,6 @@ to_supported_dtypes, ) -import dpnp.backend.extensions.math._math_impl as math_ext - from .dpnp_utils import get_usm_allocations from .dpnp_utils.dpnp_utils_reduction import dpnp_wrap_reduction_call from .dpnp_utils.dpnp_utils_statistics import dpnp_cov, dpnp_median @@ -1161,19 +1160,22 @@ def interp(x, xp, fp, left=None, right=None, period=None): dpnp.check_supported_arrays_type(x, xp, fp) if xp.ndim != 1 or fp.ndim != 1: - raise ValueError('xp and fp must be 1D arrays') + raise ValueError("xp and fp must be 1D arrays") if xp.size != fp.size: - raise ValueError('fp and xp are not of the same length') + raise ValueError("fp and xp are not of the same length") if xp.size == 0: - raise ValueError('array of sample points is empty') + raise ValueError("array of sample points is empty") if not x.flags.c_contiguous: - raise NotImplementedError('Non-C-contiguous x is currently not ' - 'supported') + raise NotImplementedError( + "Non-C-contiguous x is currently not supported" + ) x_dtype = dpnp.common_type(x, xp) if not dpnp.can_cast(x_dtype, dpnp.default_float_type()): - raise TypeError('Cannot cast array data from' - ' {} to {} according to the rule \'safe\'' - .format(x_dtype, dpnp.default_float_type())) + raise TypeError( + "Cannot cast array data from" + f" {x_dtype} to {dpnp.default_float_type()} " + "according to the rule 'safe'" + ) if period is not None: # The handling of "period" below is modified from NumPy's @@ -1193,20 +1195,20 @@ def interp(x, xp, fp, left=None, right=None, period=None): asort_xp = dpnp.argsort(xp) xp = xp[asort_xp] fp = fp[asort_xp] - xp = dpnp.concatenate((xp[-1:]-period, xp, xp[0:1]+period)) + xp = dpnp.concatenate((xp[-1:] - period, xp, xp[0:1] + period)) fp = dpnp.concatenate((fp[-1:], fp, fp[0:1])) assert xp.flags.c_contiguous assert fp.flags.c_contiguous # NumPy always returns float64 or complex128, so we upcast all values # on the fly in the kernel - out_dtype = x_dtype + out_dtype = fp.dtype output = dpnp.empty(x.shape, dtype=out_dtype) - idx = dpnp.searchsorted(xp, x, side='right') + idx = dpnp.searchsorted(xp, x, side="right") left = fp[0] if left is None else dpnp.array(left, fp.dtype) right = fp[-1] if right is None else dpnp.array(right, fp.dtype) - idx = dpnp.array(idx, dtype='uint64') + idx = dpnp.array(idx, dtype="uint64") queue = x.sycl_queue _manager = dpu.SequentialOrderManager[queue] From e1b86982b19f1422c1d030eaaa01fcb35802935c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 2 Apr 2025 04:33:20 -0700 Subject: [PATCH 04/37] Move interpolate backend to ufunc --- dpnp/CMakeLists.txt | 1 - dpnp/backend/extensions/math/CMakeLists.txt | 86 ------------------- dpnp/backend/extensions/math/math_py.cpp | 37 -------- dpnp/backend/extensions/ufunc/CMakeLists.txt | 1 + .../ufunc/elementwise_functions/common.cpp | 2 + .../elementwise_functions}/interpolate.cpp | 39 ++++++--- .../elementwise_functions}/interpolate.hpp | 4 +- .../elementwise_functions/interpolate.hpp} | 14 +-- dpnp/dpnp_iface_statistics.py | 4 +- 9 files changed, 36 insertions(+), 152 deletions(-) delete mode 100644 dpnp/backend/extensions/math/CMakeLists.txt delete mode 100644 dpnp/backend/extensions/math/math_py.cpp rename dpnp/backend/extensions/{math => ufunc/elementwise_functions}/interpolate.cpp (76%) rename dpnp/backend/extensions/{math => ufunc/elementwise_functions}/interpolate.hpp (95%) rename dpnp/backend/{extensions/math/interpolate_kernel.hpp => kernels/elementwise_functions/interpolate.hpp} (88%) diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index 6c59141bd1d9..6be90d849dc4 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -60,7 +60,6 @@ add_subdirectory(backend/extensions/blas) add_subdirectory(backend/extensions/fft) add_subdirectory(backend/extensions/indexing) add_subdirectory(backend/extensions/lapack) -add_subdirectory(backend/extensions/math) add_subdirectory(backend/extensions/statistics) add_subdirectory(backend/extensions/ufunc) add_subdirectory(backend/extensions/vm) diff --git a/dpnp/backend/extensions/math/CMakeLists.txt b/dpnp/backend/extensions/math/CMakeLists.txt deleted file mode 100644 index eed898b12496..000000000000 --- a/dpnp/backend/extensions/math/CMakeLists.txt +++ /dev/null @@ -1,86 +0,0 @@ -# ***************************************************************************** -# Copyright (c) 2016-2025, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - - -set(python_module_name _math_impl) -set(_module_src - ${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/math_py.cpp -) - -pybind11_add_module(${python_module_name} MODULE ${_module_src}) -add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) - -if(_dpnp_sycl_targets) - # make fat binary - target_compile_options( - ${python_module_name} - PRIVATE - -fsycl-targets=${_dpnp_sycl_targets} - ) - target_link_options( - ${python_module_name} - PRIVATE - -fsycl-targets=${_dpnp_sycl_targets} - ) -endif() - -if (WIN32) - if (${CMAKE_VERSION} VERSION_LESS "3.27") - # this is a work-around for target_link_options inserting option after -link option, cause - # linker to ignore it. - set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") - endif() -endif() - -set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) - -target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) -target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) - -target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) -target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) - -if (WIN32) - target_compile_options(${python_module_name} PRIVATE - /clang:-fno-approx-func - /clang:-fno-finite-math-only - ) -else() - target_compile_options(${python_module_name} PRIVATE - -fno-approx-func - -fno-finite-math-only - ) -endif() - -target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) - -if (DPNP_GENERATE_COVERAGE) - target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) -endif() - -install(TARGETS ${python_module_name} - DESTINATION "dpnp/backend/extensions/math" -) diff --git a/dpnp/backend/extensions/math/math_py.cpp b/dpnp/backend/extensions/math/math_py.cpp deleted file mode 100644 index 29348d9e437c..000000000000 --- a/dpnp/backend/extensions/math/math_py.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** -// -// This file defines functions of dpnp.backend._math_impl extensions -// -//***************************************************************************** - -#include - -#include "interpolate.hpp" - -PYBIND11_MODULE(_math_impl, m) -{ - dpnp::extensions::math::init_interpolate(m); -} diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index d363910f74df..99163539191c 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -36,6 +36,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/gcd.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/heaviside.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/i0.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/interpolate.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/lcm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/ldexp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/logaddexp2.cpp diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp index 8ff89a1b03b6..c6dd3e038eb1 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -36,6 +36,7 @@ #include "gcd.hpp" #include "heaviside.hpp" #include "i0.hpp" +#include "interpolate.hpp" #include "lcm.hpp" #include "ldexp.hpp" #include "logaddexp2.hpp" @@ -64,6 +65,7 @@ void init_elementwise_functions(py::module_ m) init_gcd(m); init_heaviside(m); init_i0(m); + init_interpolate(m); init_lcm(m); init_ldexp(m); init_logaddexp2(m); diff --git a/dpnp/backend/extensions/math/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp similarity index 76% rename from dpnp/backend/extensions/math/interpolate.cpp rename to dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index c1549826e77c..4de077b87997 100644 --- a/dpnp/backend/extensions/math/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -32,17 +32,29 @@ #include "dpctl4pybind11.hpp" #include "utils/type_dispatch.hpp" -#include "interpolate.hpp" -#include "interpolate_kernel.hpp" - -namespace dpnp::extensions::math -{ +#include "kernels/elementwise_functions/interpolate.hpp" namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -static kernels::interpolate_fn_ptr_t - interpolate_dispatch_table[td_ns::num_types][td_ns::num_types]; +namespace dpnp::extensions::ufunc +{ + +namespace impl +{ + +typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, + const void *, // x + const void *, // idx + const void *, // xp + const void *, // fp + void *, // out + std::size_t, // n + std::size_t, // xp_size + const std::vector &); + +interpolate_fn_ptr_t interpolate_dispatch_table[td_ns::num_types] + [td_ns::num_types]; std::pair py_interpolate(const dpctl::tensor::usm_ndarray &x, @@ -85,13 +97,14 @@ struct InterpolateFactory if constexpr (std::is_floating_point_v && std::is_floating_point_v) { - return kernels::interpolate_impl; + return dpnp::kernels::interpolate::interpolate_impl; } else if constexpr (std::is_floating_point_v && (std::is_same_v> || std::is_same_v>)) { - return kernels::interpolate_complex_impl; + return dpnp::kernels::interpolate::interpolate_complex_impl; } else { return nullptr; @@ -102,20 +115,22 @@ struct InterpolateFactory void init_interpolate_dispatch_table() { using namespace td_ns; - using kernels::interpolate_fn_ptr_t; DispatchTableBuilder dtb_interpolate; dtb_interpolate.populate_dispatch_table(interpolate_dispatch_table); } +} // namespace impl + void init_interpolate(py::module_ m) { - dpnp::extensions::math::init_interpolate_dispatch_table(); + impl::init_interpolate_dispatch_table(); + using impl::py_interpolate; m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"), py::arg("xp"), py::arg("fp"), py::arg("out"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } -} // namespace dpnp::extensions::math +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/math/interpolate.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.hpp similarity index 95% rename from dpnp/backend/extensions/math/interpolate.hpp rename to dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.hpp index e5df239aabdf..4ae1cb2c8958 100644 --- a/dpnp/backend/extensions/math/interpolate.hpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.hpp @@ -29,7 +29,7 @@ namespace py = pybind11; -namespace dpnp::extensions::math +namespace dpnp::extensions::ufunc { void init_interpolate(py::module_ m); -} // namespace dpnp::extensions::math +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/math/interpolate_kernel.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp similarity index 88% rename from dpnp/backend/extensions/math/interpolate_kernel.hpp rename to dpnp/backend/kernels/elementwise_functions/interpolate.hpp index 82ce0c19a33d..f562c355fc37 100644 --- a/dpnp/backend/extensions/math/interpolate_kernel.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -5,19 +5,9 @@ #include "utils/type_utils.hpp" -namespace dpnp::extensions::math::kernels +namespace dpnp::kernels::interpolate { -using interpolate_fn_ptr_t = sycl::event (*)(sycl::queue &, - const void *, // x - const void *, // idx - const void *, // xp - const void *, // fp - void *, // out - std::size_t, // n - std::size_t, // xp_size - const std::vector &); - template sycl::event interpolate_impl(sycl::queue &q, const void *vx, @@ -159,4 +149,4 @@ sycl::event interpolate_complex_impl(sycl::queue &q, }); } -} // namespace dpnp::extensions::math::kernels +} // namespace dpnp::kernels::interpolate diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 802243139de5..0f52a31b730f 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -46,10 +46,10 @@ from dpctl.tensor._numpy_helper import normalize_axis_index import dpnp -import dpnp.backend.extensions.math._math_impl as math_ext # pylint: disable=no-name-in-module import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext +import dpnp.backend.extensions.ufunc._ufunc_impl as ufi from dpnp.dpnp_utils.dpnp_utils_common import ( result_type_for_device, to_supported_dtypes, @@ -1212,7 +1212,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): queue = x.sycl_queue _manager = dpu.SequentialOrderManager[queue] - mem_ev, ht_ev = math_ext._interpolate( + mem_ev, ht_ev = ufi._interpolate( x.get_array(), idx.get_array(), xp.get_array(), From 00374553f20fc445db8841483445ba44395fc4cb Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 2 Apr 2025 04:35:20 -0700 Subject: [PATCH 05/37] Move def interp()to dpnp_iface_mathematical --- dpnp/dpnp_iface_mathematical.py | 178 +++++++++++++++++++++++++++++++- dpnp/dpnp_iface_statistics.py | 177 ------------------------------- 2 files changed, 177 insertions(+), 178 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 9f9d5dcca082..551e28077f55 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -111,8 +111,9 @@ "gcd", "gradient", "heaviside", - "imag", "i0", + "imag", + "interp", "lcm", "ldexp", "maximum", @@ -2689,6 +2690,181 @@ def gradient(f, *varargs, axis=None, edge_order=1): ) +def interp(x, xp, fp, left=None, right=None, period=None): + """ + One-dimensional linear interpolation for monotonically increasing sample points. + + Returns the one-dimensional piecewise linear interpolant to a function + with given discrete data points (`xp`, `fp`), evaluated at `x`. + + Parameters + ---------- + x : array_like + The x-coordinates at which to evaluate the interpolated values. + + xp : 1-D sequence of floats + The x-coordinates of the data points, must be increasing if argument + `period` is not specified. Otherwise, `xp` is internally sorted after + normalizing the periodic boundaries with ``xp = xp % period``. + + fp : 1-D sequence of float or complex + The y-coordinates of the data points, same length as `xp`. + + left : optional float or complex corresponding to fp + Value to return for `x < xp[0]`, default is `fp[0]`. + + right : optional float or complex corresponding to fp + Value to return for `x > xp[-1]`, default is `fp[-1]`. + + period : None or float, optional + A period for the x-coordinates. This parameter allows the proper + interpolation of angular x-coordinates. Parameters `left` and `right` + are ignored if `period` is specified. + + Returns + ------- + y : float or complex (corresponding to fp) or ndarray + The interpolated values, same shape as `x`. + + Raises + ------ + ValueError + If `xp` and `fp` have different length + If `xp` or `fp` are not 1-D sequences + If `period == 0` + + See Also + -------- + scipy.interpolate + + Warnings + -------- + The x-coordinate sequence is expected to be increasing, but this is not + explicitly enforced. However, if the sequence `xp` is non-increasing, + interpolation results are meaningless. + + Note that, since NaN is unsortable, `xp` also cannot contain NaNs. + + A simple check for `xp` being strictly increasing is:: + + np.all(np.diff(xp) > 0) + + Examples + -------- + >>> import dpnp as np + >>> xp = np.array([1, 2, 3]) + >>> fp = np.array([3 ,2 ,0]) + >>> x = np.array([2.5]) + >>> np.interp(2.5, xp, fp) + 1.0 + >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp) + array([3. , 3. , 2.5 , 0.56, 0. ]) + >>> UNDEF = -99.0 + >>> np.interp(3.14, xp, fp, right=UNDEF) + -99.0 + + Plot an interpolant to the sine function: + + >>> x = np.linspace(0, 2*np.pi, 10) + >>> y = np.sin(x) + >>> xvals = np.linspace(0, 2*np.pi, 50) + >>> yinterp = np.interp(xvals, x, y) + >>> import matplotlib.pyplot as plt + >>> plt.plot(x, y, 'o') + [] + >>> plt.plot(xvals, yinterp, '-x') + [] + >>> plt.show() + + Interpolation with periodic x-coordinates: + + >>> x = [-180, -170, -185, 185, -10, -5, 0, 365] + >>> xp = [190, -190, 350, -350] + >>> fp = [5, 10, 3, 4] + >>> np.interp(x, xp, fp, period=360) + array([7.5 , 5. , 8.75, 6.25, 3. , 3.25, 3.5 , 3.75]) + + Complex interpolation: + + >>> x = [1.5, 4.0] + >>> xp = [2,3,5] + >>> fp = [1.0j, 0, 2+3j] + >>> np.interp(x, xp, fp) + array([0.+1.j , 1.+1.5j]) + + """ + + dpnp.check_supported_arrays_type(x, xp, fp) + + if xp.ndim != 1 or fp.ndim != 1: + raise ValueError("xp and fp must be 1D arrays") + if xp.size != fp.size: + raise ValueError("fp and xp are not of the same length") + if xp.size == 0: + raise ValueError("array of sample points is empty") + if not x.flags.c_contiguous: + raise NotImplementedError( + "Non-C-contiguous x is currently not supported" + ) + x_dtype = dpnp.common_type(x, xp) + if not dpnp.can_cast(x_dtype, dpnp.default_float_type()): + raise TypeError( + "Cannot cast array data from" + f" {x_dtype} to {dpnp.default_float_type()} " + "according to the rule 'safe'" + ) + + if period is not None: + # The handling of "period" below is modified from NumPy's + + if period == 0: + raise ValueError("period must be a non-zero value") + period = dpnp.abs(period) + left = None + right = None + + x = x.astype(dpnp.default_float_type()) + xp = xp.astype(dpnp.default_float_type()) + + # normalizing periodic boundaries + x %= period + xp %= period + asort_xp = dpnp.argsort(xp) + xp = xp[asort_xp] + fp = fp[asort_xp] + xp = dpnp.concatenate((xp[-1:] - period, xp, xp[0:1] + period)) + fp = dpnp.concatenate((fp[-1:], fp, fp[0:1])) + assert xp.flags.c_contiguous + assert fp.flags.c_contiguous + + # NumPy always returns float64 or complex128, so we upcast all values + # on the fly in the kernel + out_dtype = fp.dtype + output = dpnp.empty(x.shape, dtype=out_dtype) + idx = dpnp.searchsorted(xp, x, side="right") + left = fp[0] if left is None else dpnp.array(left, fp.dtype) + right = fp[-1] if right is None else dpnp.array(right, fp.dtype) + + idx = dpnp.array(idx, dtype="uint64") + + queue = x.sycl_queue + _manager = dpu.SequentialOrderManager[queue] + mem_ev, ht_ev = ufi._interpolate( + x.get_array(), + idx.get_array(), + xp.get_array(), + fp.get_array(), + # left, + # right, + output.get_array(), + queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(mem_ev, ht_ev) + + return output + + _LCM_DOCSTRING = """ Returns the lowest common multiple of ``|x1|`` and ``|x2|``. diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 0f52a31b730f..3958127789ab 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -49,7 +49,6 @@ # pylint: disable=no-name-in-module import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext -import dpnp.backend.extensions.ufunc._ufunc_impl as ufi from dpnp.dpnp_utils.dpnp_utils_common import ( result_type_for_device, to_supported_dtypes, @@ -67,7 +66,6 @@ "corrcoef", "correlate", "cov", - "interp", "max", "mean", "median", @@ -1053,181 +1051,6 @@ def cov( ) -def interp(x, xp, fp, left=None, right=None, period=None): - """ - One-dimensional linear interpolation for monotonically increasing sample points. - - Returns the one-dimensional piecewise linear interpolant to a function - with given discrete data points (`xp`, `fp`), evaluated at `x`. - - Parameters - ---------- - x : array_like - The x-coordinates at which to evaluate the interpolated values. - - xp : 1-D sequence of floats - The x-coordinates of the data points, must be increasing if argument - `period` is not specified. Otherwise, `xp` is internally sorted after - normalizing the periodic boundaries with ``xp = xp % period``. - - fp : 1-D sequence of float or complex - The y-coordinates of the data points, same length as `xp`. - - left : optional float or complex corresponding to fp - Value to return for `x < xp[0]`, default is `fp[0]`. - - right : optional float or complex corresponding to fp - Value to return for `x > xp[-1]`, default is `fp[-1]`. - - period : None or float, optional - A period for the x-coordinates. This parameter allows the proper - interpolation of angular x-coordinates. Parameters `left` and `right` - are ignored if `period` is specified. - - Returns - ------- - y : float or complex (corresponding to fp) or ndarray - The interpolated values, same shape as `x`. - - Raises - ------ - ValueError - If `xp` and `fp` have different length - If `xp` or `fp` are not 1-D sequences - If `period == 0` - - See Also - -------- - scipy.interpolate - - Warnings - -------- - The x-coordinate sequence is expected to be increasing, but this is not - explicitly enforced. However, if the sequence `xp` is non-increasing, - interpolation results are meaningless. - - Note that, since NaN is unsortable, `xp` also cannot contain NaNs. - - A simple check for `xp` being strictly increasing is:: - - np.all(np.diff(xp) > 0) - - Examples - -------- - >>> import dpnp as np - >>> xp = np.array([1, 2, 3]) - >>> fp = np.array([3 ,2 ,0]) - >>> x = np.array([2.5]) - >>> np.interp(2.5, xp, fp) - 1.0 - >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp) - array([3. , 3. , 2.5 , 0.56, 0. ]) - >>> UNDEF = -99.0 - >>> np.interp(3.14, xp, fp, right=UNDEF) - -99.0 - - Plot an interpolant to the sine function: - - >>> x = np.linspace(0, 2*np.pi, 10) - >>> y = np.sin(x) - >>> xvals = np.linspace(0, 2*np.pi, 50) - >>> yinterp = np.interp(xvals, x, y) - >>> import matplotlib.pyplot as plt - >>> plt.plot(x, y, 'o') - [] - >>> plt.plot(xvals, yinterp, '-x') - [] - >>> plt.show() - - Interpolation with periodic x-coordinates: - - >>> x = [-180, -170, -185, 185, -10, -5, 0, 365] - >>> xp = [190, -190, 350, -350] - >>> fp = [5, 10, 3, 4] - >>> np.interp(x, xp, fp, period=360) - array([7.5 , 5. , 8.75, 6.25, 3. , 3.25, 3.5 , 3.75]) - - Complex interpolation: - - >>> x = [1.5, 4.0] - >>> xp = [2,3,5] - >>> fp = [1.0j, 0, 2+3j] - >>> np.interp(x, xp, fp) - array([0.+1.j , 1.+1.5j]) - - """ - - dpnp.check_supported_arrays_type(x, xp, fp) - - if xp.ndim != 1 or fp.ndim != 1: - raise ValueError("xp and fp must be 1D arrays") - if xp.size != fp.size: - raise ValueError("fp and xp are not of the same length") - if xp.size == 0: - raise ValueError("array of sample points is empty") - if not x.flags.c_contiguous: - raise NotImplementedError( - "Non-C-contiguous x is currently not supported" - ) - x_dtype = dpnp.common_type(x, xp) - if not dpnp.can_cast(x_dtype, dpnp.default_float_type()): - raise TypeError( - "Cannot cast array data from" - f" {x_dtype} to {dpnp.default_float_type()} " - "according to the rule 'safe'" - ) - - if period is not None: - # The handling of "period" below is modified from NumPy's - - if period == 0: - raise ValueError("period must be a non-zero value") - period = dpnp.abs(period) - left = None - right = None - - x = x.astype(dpnp.default_float_type()) - xp = xp.astype(dpnp.default_float_type()) - - # normalizing periodic boundaries - x %= period - xp %= period - asort_xp = dpnp.argsort(xp) - xp = xp[asort_xp] - fp = fp[asort_xp] - xp = dpnp.concatenate((xp[-1:] - period, xp, xp[0:1] + period)) - fp = dpnp.concatenate((fp[-1:], fp, fp[0:1])) - assert xp.flags.c_contiguous - assert fp.flags.c_contiguous - - # NumPy always returns float64 or complex128, so we upcast all values - # on the fly in the kernel - out_dtype = fp.dtype - output = dpnp.empty(x.shape, dtype=out_dtype) - idx = dpnp.searchsorted(xp, x, side="right") - left = fp[0] if left is None else dpnp.array(left, fp.dtype) - right = fp[-1] if right is None else dpnp.array(right, fp.dtype) - - idx = dpnp.array(idx, dtype="uint64") - - queue = x.sycl_queue - _manager = dpu.SequentialOrderManager[queue] - mem_ev, ht_ev = ufi._interpolate( - x.get_array(), - idx.get_array(), - xp.get_array(), - fp.get_array(), - # left, - # right, - output.get_array(), - queue, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(mem_ev, ht_ev) - - return output - - def max(a, axis=None, out=None, keepdims=False, initial=None, where=True): """ Return the maximum of an array or maximum along an axis. From 7866eb85bc342edaa2a7ca7168732e7f7073e1ca Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 2 Apr 2025 03:52:18 -0700 Subject: [PATCH 06/37] Use dispatch vector and remove interpolate_complex_impl --- .../elementwise_functions/interpolate.cpp | 91 +++++++++--- .../elementwise_functions/interpolate.hpp | 134 +++++------------- 2 files changed, 107 insertions(+), 118 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 4de077b87997..b2573d26f5b9 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -23,6 +23,7 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +#include #include #include @@ -43,6 +44,21 @@ namespace dpnp::extensions::ufunc namespace impl { +template +struct value_type_of +{ + using type = T; +}; + +template +struct value_type_of> +{ + using type = T; +}; + +template +using value_type_of_t = typename value_type_of::type; + typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, const void *, // x const void *, // idx @@ -53,8 +69,34 @@ typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, std::size_t, // xp_size const std::vector &); -interpolate_fn_ptr_t interpolate_dispatch_table[td_ns::num_types] - [td_ns::num_types]; +template +sycl::event interpolate_call(sycl::queue &exec_q, + const void *vx, + const void *vidx, + const void *vxp, + const void *vfp, + void *vout, + std::size_t n, + std::size_t xp_size, + const std::vector &depends) +{ + using dpctl::tensor::type_utils::is_complex_v; + using TCoord = std::conditional_t, value_type_of_t, T>; + + const TCoord *x = static_cast(vx); + const std::size_t *idx = static_cast(vidx); + const TCoord *xp = static_cast(vxp); + const T *fp = static_cast(vfp); + T *out = static_cast(vout); + + using dpnp::kernels::interpolate::interpolate_impl; + sycl::event interpolate_ev = interpolate_impl( + exec_q, x, idx, xp, fp, out, n, xp_size, depends); + + return interpolate_ev; +} + +interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types]; std::pair py_interpolate(const dpctl::tensor::usm_ndarray &x, @@ -72,7 +114,7 @@ std::pair int xp_type_id = array_types.typenum_to_lookup_id(xp_typenum); int fp_type_id = array_types.typenum_to_lookup_id(fp_typenum); - auto fn = interpolate_dispatch_table[xp_type_id][fp_type_id]; + auto fn = interpolate_dispatch_vector[fp_type_id]; if (!fn) { throw py::type_error("Unsupported dtype."); } @@ -89,43 +131,54 @@ std::pair return std::make_pair(keep, ev); } -template +/** + * @brief A factory to define pairs of supported types for which + * interpolate function is available. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct InterpolateOutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template struct InterpolateFactory { fnT get() { - if constexpr (std::is_floating_point_v && - std::is_floating_point_v) - { - return dpnp::kernels::interpolate::interpolate_impl; - } - else if constexpr (std::is_floating_point_v && - (std::is_same_v> || - std::is_same_v>)) + if constexpr (std::is_same_v< + typename InterpolateOutputType::value_type, void>) { - return dpnp::kernels::interpolate::interpolate_complex_impl; + return nullptr; } else { - return nullptr; + return interpolate_call; } } }; -void init_interpolate_dispatch_table() +void init_interpolate_dispatch_vectors() { using namespace td_ns; - DispatchTableBuilder + DispatchVectorBuilder dtb_interpolate; - dtb_interpolate.populate_dispatch_table(interpolate_dispatch_table); + dtb_interpolate.populate_dispatch_vector(interpolate_dispatch_vector); } } // namespace impl void init_interpolate(py::module_ m) { - impl::init_interpolate_dispatch_table(); + impl::init_interpolate_dispatch_vectors(); using impl::py_interpolate; m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"), diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index f562c355fc37..17497f98c4f4 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -5,26 +5,44 @@ #include "utils/type_utils.hpp" +namespace type_utils = dpctl::tensor::type_utils; + namespace dpnp::kernels::interpolate { +template +struct IsNan +{ + static bool isnan(const T &v) + { + if constexpr (type_utils::is_complex_v) { + using vT = typename T::value_type; + + const vT real1 = std::real(v); + const vT imag1 = std::imag(v); + + return IsNan::isnan(real1) || IsNan::isnan(imag1); + } + else if constexpr (std::is_floating_point_v || + std::is_same_v) { + return sycl::isnan(v); + } + + return false; + } +}; + template sycl::event interpolate_impl(sycl::queue &q, - const void *vx, - const void *vidx, - const void *vxp, - const void *vfp, - void *vout, - std::size_t n, - std::size_t xp_size, + const TCoord *x, + const std::size_t *idx, + const TCoord *xp, + const TValue *fp, + TValue *out, + const std::size_t n, + const std::size_t xp_size, const std::vector &depends) { - const TCoord *x = static_cast(vx); - const std::size_t *idx = static_cast(vidx); - const TCoord *xp = static_cast(vxp); - const TValue *fp = static_cast(vfp); - TValue *out = static_cast(vout); - return q.submit([&](sycl::handler &h) { h.depends_on(depends); h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { @@ -34,7 +52,7 @@ sycl::event interpolate_impl(sycl::queue &q, TCoord x_val = x[i]; std::size_t x_idx = idx[i] - 1; - if (sycl::isnan(x_val)) { + if (IsNan::isnan(x_val)) { out[i] = x_val; } else if (x_idx < 0) { @@ -54,9 +72,10 @@ sycl::event interpolate_impl(sycl::queue &q, (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx]; - if (sycl::isnan(res)) { + if (IsNan::isnan(res)) { res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; - if (sycl::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { + if (IsNan::isnan(res) && + (fp[x_idx] == fp[x_idx + 1])) { res = fp[x_idx]; } } @@ -66,87 +85,4 @@ sycl::event interpolate_impl(sycl::queue &q, }); } -template -sycl::event interpolate_complex_impl(sycl::queue &q, - const void *vx, - const void *vidx, - const void *vxp, - const void *vfp, - void *vout, - std::size_t n, - std::size_t xp_size, - const std::vector &depends) -{ - const TCoord *x = static_cast(vx); - const std::size_t *idx = static_cast(vidx); - const TCoord *xp = static_cast(vxp); - const TValue *fp = static_cast(vfp); - TValue *out = static_cast(vout); - - using realT = typename TValue::value_type; - - return q.submit([&](sycl::handler &h) { - h.depends_on(depends); - h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { - realT left_r = fp[0].real(); - realT right_r = fp[xp_size - 1].real(); - realT left_i = fp[0].imag(); - realT right_i = fp[xp_size - 1].imag(); - - TCoord x_val = x[i]; - std::size_t x_idx = idx[i] - 1; - - realT res_r = 0.0; - realT res_i = 0.0; - - if (sycl::isnan(x_val)) { - res_r = x_val; - res_i = 0.0; - } - else if (x_idx < 0) { - res_r = left_r; - res_i = left_i; - } - else if (x_val == xp[xp_size - 1]) { - res_r = right_r; - res_i = right_i; - } - else if (x_idx >= xp_size - 1) { - res_r = right_r; - res_i = right_i; - } - else if (x_val == xp[x_idx]) { - res_r = fp[x_idx].real(); - res_i = fp[x_idx].imag(); - } - else { - realT dx = xp[x_idx + 1] - xp[x_idx]; - - realT slope_r = (fp[x_idx + 1].real() - fp[x_idx].real()) / dx; - res_r = slope_r * (x_val - xp[x_idx]) + fp[x_idx].real(); - if (sycl::isnan(res_r)) { - res_r = slope_r * (x_val - xp[x_idx + 1]) + - fp[x_idx + 1].real(); - if (sycl::isnan(res_r) && - fp[x_idx].real() == fp[x_idx + 1].real()) { - res_r = fp[x_idx].real(); - } - } - - realT slope_i = (fp[x_idx + 1].imag() - fp[x_idx].imag()) / dx; - res_i = slope_i * (x_val - xp[x_idx]) + fp[x_idx].imag(); - if (sycl::isnan(res_i)) { - res_i = slope_i * (x_val - xp[x_idx + 1]) + - fp[x_idx + 1].imag(); - if (sycl::isnan(res_i) && - fp[x_idx].imag() == fp[x_idx + 1].imag()) { - res_i = fp[x_idx].imag(); - } - } - } - out[i] = TValue(res_r, res_i); - }); - }); -} - } // namespace dpnp::kernels::interpolate From 51b3bde065f47aab6ec47fb1ab1245348ceea1eb Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 2 Apr 2025 06:59:02 -0700 Subject: [PATCH 07/37] Add more backend checks --- .../elementwise_functions/interpolate.cpp | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index b2573d26f5b9..cd6380babe2d 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -26,11 +26,12 @@ #include #include +#include "dpctl4pybind11.hpp" #include #include // dpctl tensor headers -#include "dpctl4pybind11.hpp" +#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include "kernels/elementwise_functions/interpolate.hpp" @@ -107,16 +108,48 @@ std::pair sycl::queue &exec_q, const std::vector &depends) { + int x_typenum = x.get_typenum(); int xp_typenum = xp.get_typenum(); int fp_typenum = fp.get_typenum(); + int out_typenum = out.get_typenum(); auto array_types = td_ns::usm_ndarray_types(); + int x_type_id = array_types.typenum_to_lookup_id(x_typenum); int xp_type_id = array_types.typenum_to_lookup_id(xp_typenum); int fp_type_id = array_types.typenum_to_lookup_id(fp_typenum); + int out_type_id = array_types.typenum_to_lookup_id(out_typenum); + + if (x_type_id != xp_type_id) { + throw py::value_error("x and xp must have the same dtype"); + } + if (fp_type_id != out_type_id) { + throw py::value_error("fp and out must have the same dtype"); + } auto fn = interpolate_dispatch_vector[fp_type_id]; if (!fn) { - throw py::type_error("Unsupported dtype."); + throw py::type_error("Unsupported dtype"); + } + + if (!dpctl::utils::queues_are_compatible(exec_q, {x, idx, xp, fp, out})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(out); + + if (x.get_ndim() != 1 || xp.get_ndim() != 1 || fp.get_ndim() != 1 || + idx.get_ndim() != 1 || out.get_ndim() != 1) + { + throw py::value_error("All arrays must be one-dimensional"); + } + + if (xp.get_size() != fp.get_size()) { + throw py::value_error("xp and fp must have the same size"); + } + + if (x.get_size() != out.get_size() || x.get_size() != idx.get_size()) { + throw py::value_error("x, idx, and out must have the same size"); } std::size_t n = x.get_size(); From ecfa37dfafd493e2d92b6ee6f603241f7277df8d Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 10 Apr 2025 04:16:42 -0700 Subject: [PATCH 08/37] Add support left/right args --- .../elementwise_functions/interpolate.cpp | 44 ++++++++++++++++--- .../elementwise_functions/interpolate.hpp | 12 ++--- dpnp/dpnp_iface_mathematical.py | 12 +++-- 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index cd6380babe2d..8190ffcd929c 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -65,6 +65,8 @@ typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, const void *, // idx const void *, // xp const void *, // fp + const void *, // left + const void *, // right void *, // out std::size_t, // n std::size_t, // xp_size @@ -76,6 +78,8 @@ sycl::event interpolate_call(sycl::queue &exec_q, const void *vidx, const void *vxp, const void *vfp, + const void *vleft, + const void *vright, void *vout, std::size_t n, std::size_t xp_size, @@ -88,11 +92,13 @@ sycl::event interpolate_call(sycl::queue &exec_q, const std::size_t *idx = static_cast(vidx); const TCoord *xp = static_cast(vxp); const T *fp = static_cast(vfp); + const T *left = static_cast(vleft); + const T *right = static_cast(vright); T *out = static_cast(vout); using dpnp::kernels::interpolate::interpolate_impl; sycl::event interpolate_ev = interpolate_impl( - exec_q, x, idx, xp, fp, out, n, xp_size, depends); + exec_q, x, idx, xp, fp, left, right, out, n, xp_size, depends); return interpolate_ev; } @@ -104,6 +110,8 @@ std::pair const dpctl::tensor::usm_ndarray &idx, const dpctl::tensor::usm_ndarray &xp, const dpctl::tensor::usm_ndarray &fp, + std::optional &left, + std::optional &right, dpctl::tensor::usm_ndarray &out, sycl::queue &exec_q, const std::vector &depends) @@ -155,13 +163,34 @@ std::pair std::size_t n = x.get_size(); std::size_t xp_size = xp.get_size(); - sycl::event ev = fn(exec_q, x.get_data(), idx.get_data(), xp.get_data(), - fp.get_data(), out.get_data(), n, xp_size, depends); + void *left_ptr = left.has_value() ? left.value().get_data() : nullptr; - sycl::event keep = - dpctl::utils::keep_args_alive(exec_q, {x, idx, xp, fp, out}, {ev}); + void *right_ptr = right.has_value() ? right.value().get_data() : nullptr; - return std::make_pair(keep, ev); + sycl::event ev = + fn(exec_q, x.get_data(), idx.get_data(), xp.get_data(), fp.get_data(), + left_ptr, right_ptr, out.get_data(), n, xp_size, depends); + + sycl::event args_ev; + + if (left.has_value() && right.has_value()) { + args_ev = dpctl::utils::keep_args_alive( + exec_q, {x, idx, xp, fp, out, left.value(), right.value()}, {ev}); + } + else if (left.has_value()) { + args_ev = dpctl::utils::keep_args_alive( + exec_q, {x, idx, xp, fp, out, left.value()}, {ev}); + } + else if (right.has_value()) { + args_ev = dpctl::utils::keep_args_alive( + exec_q, {x, idx, xp, fp, out, right.value()}, {ev}); + } + else { + args_ev = + dpctl::utils::keep_args_alive(exec_q, {x, idx, xp, fp, out}, {ev}); + } + + return std::make_pair(args_ev, ev); } /** @@ -215,7 +244,8 @@ void init_interpolate(py::module_ m) using impl::py_interpolate; m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"), - py::arg("xp"), py::arg("fp"), py::arg("out"), py::arg("sycl_queue"), + py::arg("xp"), py::arg("fp"), py::arg("left"), py::arg("right"), + py::arg("out"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index 17497f98c4f4..7eb974515f0e 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -38,6 +38,8 @@ sycl::event interpolate_impl(sycl::queue &q, const std::size_t *idx, const TCoord *xp, const TValue *fp, + const TValue *left, + const TValue *right, TValue *out, const std::size_t n, const std::size_t xp_size, @@ -46,8 +48,8 @@ sycl::event interpolate_impl(sycl::queue &q, return q.submit([&](sycl::handler &h) { h.depends_on(depends); h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { - TValue left = fp[0]; - TValue right = fp[xp_size - 1]; + TValue left_val = left ? *left : fp[0]; + TValue right_val = right ? *right : fp[xp_size - 1]; TCoord x_val = x[i]; std::size_t x_idx = idx[i] - 1; @@ -56,13 +58,13 @@ sycl::event interpolate_impl(sycl::queue &q, out[i] = x_val; } else if (x_idx < 0) { - out[i] = left; + out[i] = left_val; } else if (x_val == xp[xp_size - 1]) { - out[i] = right; + out[i] = right_val; } else if (x_idx >= xp_size - 1) { - out[i] = right; + out[i] = right_val; } else if (x_val == xp[x_idx]) { out[i] = fp[x_idx]; diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 551e28077f55..c72be3c1dd5b 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2842,8 +2842,12 @@ def interp(x, xp, fp, left=None, right=None, period=None): out_dtype = fp.dtype output = dpnp.empty(x.shape, dtype=out_dtype) idx = dpnp.searchsorted(xp, x, side="right") - left = fp[0] if left is None else dpnp.array(left, fp.dtype) - right = fp[-1] if right is None else dpnp.array(right, fp.dtype) + left_usm = ( + dpnp.array(left, fp.dtype).get_array() if left is not None else None + ) + right_usm = ( + dpnp.array(right, fp.dtype).get_array() if right is not None else None + ) idx = dpnp.array(idx, dtype="uint64") @@ -2854,8 +2858,8 @@ def interp(x, xp, fp, left=None, right=None, period=None): idx.get_array(), xp.get_array(), fp.get_array(), - # left, - # right, + left_usm, + right_usm, output.get_array(), queue, depends=_manager.submitted_events, From 5d53f9c1cdb8469961edd45e18db2199fc80c3a6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 10 Apr 2025 04:25:27 -0700 Subject: [PATCH 09/37] Use get_usm_allocations in def interp --- dpnp/dpnp_iface_mathematical.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index c72be3c1dd5b..b5f0c16f3118 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2806,11 +2806,15 @@ def interp(x, xp, fp, left=None, right=None, period=None): raise NotImplementedError( "Non-C-contiguous x is currently not supported" ) + _, exec_q = get_usm_allocations([x, xp, fp]) + x_dtype = dpnp.common_type(x, xp) - if not dpnp.can_cast(x_dtype, dpnp.default_float_type()): + x_float_type = dpnp.default_float_type(exec_q) + + if not dpnp.can_cast(x_dtype, x_float_type): raise TypeError( "Cannot cast array data from" - f" {x_dtype} to {dpnp.default_float_type()} " + f" {x_dtype} to {x_float_type} " "according to the rule 'safe'" ) @@ -2823,8 +2827,8 @@ def interp(x, xp, fp, left=None, right=None, period=None): left = None right = None - x = x.astype(dpnp.default_float_type()) - xp = xp.astype(dpnp.default_float_type()) + x = x.astype(x_float_type) + xp = xp.astype(x_float_type) # normalizing periodic boundaries x %= period @@ -2849,10 +2853,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): dpnp.array(right, fp.dtype).get_array() if right is not None else None ) - idx = dpnp.array(idx, dtype="uint64") - - queue = x.sycl_queue - _manager = dpu.SequentialOrderManager[queue] + _manager = dpu.SequentialOrderManager[exec_q] mem_ev, ht_ev = ufi._interpolate( x.get_array(), idx.get_array(), @@ -2861,7 +2862,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): left_usm, right_usm, output.get_array(), - queue, + exec_q, depends=_manager.submitted_events, ) _manager.add_event_pair(mem_ev, ht_ev) From 9dbc2c5d5e3736f32463b857b91d53907e45330f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 03:57:36 -0700 Subject: [PATCH 10/37] Pass idx as std::int64_t --- .../ufunc/elementwise_functions/interpolate.cpp | 2 +- .../kernels/elementwise_functions/interpolate.hpp | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 8190ffcd929c..7e6cf7b4613e 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -89,7 +89,7 @@ sycl::event interpolate_call(sycl::queue &exec_q, using TCoord = std::conditional_t, value_type_of_t, T>; const TCoord *x = static_cast(vx); - const std::size_t *idx = static_cast(vidx); + const std::int64_t *idx = static_cast(vidx); const TCoord *xp = static_cast(vxp); const T *fp = static_cast(vfp); const T *left = static_cast(vleft); diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index 7eb974515f0e..1c12f051cad4 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -35,7 +35,7 @@ struct IsNan template sycl::event interpolate_impl(sycl::queue &q, const TCoord *x, - const std::size_t *idx, + const std::int64_t *idx, const TCoord *xp, const TValue *fp, const TValue *left, @@ -52,7 +52,7 @@ sycl::event interpolate_impl(sycl::queue &q, TValue right_val = right ? *right : fp[xp_size - 1]; TCoord x_val = x[i]; - std::size_t x_idx = idx[i] - 1; + std::int64_t x_idx = idx[i] - 1; if (IsNan::isnan(x_val)) { out[i] = x_val; @@ -63,12 +63,9 @@ sycl::event interpolate_impl(sycl::queue &q, else if (x_val == xp[xp_size - 1]) { out[i] = right_val; } - else if (x_idx >= xp_size - 1) { + else if (x_idx >= static_cast(xp_size - 1)) { out[i] = right_val; } - else if (x_val == xp[x_idx]) { - out[i] = fp[x_idx]; - } else { TValue slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); From 1bafd7c614dc0290216ad29f4631c2f7b70565b9 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 04:01:09 -0700 Subject: [PATCH 11/37] Add proper casting input array --- dpnp/dpnp_iface_mathematical.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index b5f0c16f3118..ad17316c25a3 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2818,6 +2818,13 @@ def interp(x, xp, fp, left=None, right=None, period=None): "according to the rule 'safe'" ) + x = dpnp.asarray(x, dtype=x_float_type, order="C") + xp = dpnp.asarray(xp, dtype=x_float_type, order="C") + + out_dtype = dpnp.common_type(x, xp, fp) + + fp = dpnp.asarray(fp, dtype=out_dtype, order="C") + if period is not None: # The handling of "period" below is modified from NumPy's @@ -2841,9 +2848,6 @@ def interp(x, xp, fp, left=None, right=None, period=None): assert xp.flags.c_contiguous assert fp.flags.c_contiguous - # NumPy always returns float64 or complex128, so we upcast all values - # on the fly in the kernel - out_dtype = fp.dtype output = dpnp.empty(x.shape, dtype=out_dtype) idx = dpnp.searchsorted(xp, x, side="right") left_usm = ( From 2f43fd78c7c881fc361238a04dc5006100bb5f01 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 05:43:37 -0700 Subject: [PATCH 12/37] Update def interp to support period args --- dpnp/dpnp_iface_mathematical.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index ad17316c25a3..1fc2866c7f9f 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2806,7 +2806,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): raise NotImplementedError( "Non-C-contiguous x is currently not supported" ) - _, exec_q = get_usm_allocations([x, xp, fp]) + usm_type, exec_q = get_usm_allocations([x, xp, fp]) x_dtype = dpnp.common_type(x, xp) x_float_type = dpnp.default_float_type(exec_q) @@ -2828,6 +2828,15 @@ def interp(x, xp, fp, left=None, right=None, period=None): if period is not None: # The handling of "period" below is modified from NumPy's + if dpnp.is_supported_array_type(period): + if dpu.get_execution_queue([exec_q, period.sycl_queue]) is None: + raise ValueError( + "input arrays and period must be allocated " + "on the same SYCL queue" + ) + else: + period = dpnp.asarray(period, sycl_queue=exec_q, usm_type=usm_type) + if period == 0: raise ValueError("period must be a non-zero value") period = dpnp.abs(period) From ae65091bd5284e7b2b121a56557fcc141ef6163d Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 06:05:02 -0700 Subject: [PATCH 13/37] Return fp[-1] instead of right_val for x==xp[-1] --- dpnp/backend/kernels/elementwise_functions/interpolate.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index 1c12f051cad4..fd9e64d5a75a 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -61,7 +61,7 @@ sycl::event interpolate_impl(sycl::queue &q, out[i] = left_val; } else if (x_val == xp[xp_size - 1]) { - out[i] = right_val; + out[i] = fp[xp_size - 1]; } else if (x_idx >= static_cast(xp_size - 1)) { out[i] = right_val; From 771d3ebb6e337bdc801a5ec900bf22fedb57e3bc Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 07:36:07 -0700 Subject: [PATCH 14/37] Unskip cupy tests for interp --- dpnp/tests/third_party/cupy/math_tests/test_misc.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/dpnp/tests/third_party/cupy/math_tests/test_misc.py b/dpnp/tests/third_party/cupy/math_tests/test_misc.py index 7746c56d3253..aeebd90142a0 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_misc.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_misc.py @@ -367,7 +367,6 @@ def test_real_if_close_with_float_tol_false(self, xp, dtype): assert x.dtype == out.dtype return out - @pytest.mark.skip("interp() is not supported yet") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) @testing.numpy_cupy_allclose(atol=1e-5) @@ -378,7 +377,6 @@ def test_interp(self, xp, dtype_y, dtype_x): fy = xp.sin(fx).astype(dtype_y) return xp.interp(x, fx, fy) - @pytest.mark.skip("interp() is not supported yet") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) @testing.numpy_cupy_allclose(atol=1e-5) @@ -389,7 +387,6 @@ def test_interp_period(self, xp, dtype_y, dtype_x): fy = xp.sin(fx).astype(dtype_y) return xp.interp(x, fx, fy, period=5) - @pytest.mark.skip("interp() is not supported yet") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) @testing.numpy_cupy_allclose(atol=1e-5) @@ -402,7 +399,6 @@ def test_interp_left_right(self, xp, dtype_y, dtype_x): right = 20 return xp.interp(x, fx, fy, left, right) - @pytest.mark.skip("interp() is not supported yet") @testing.with_requires("numpy>=1.17.0") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_dtypes("efdFD", name="dtype_y") @@ -415,7 +411,6 @@ def test_interp_nan_fy(self, xp, dtype_y, dtype_x): fy[0] = fy[2] = fy[-1] = numpy.nan return xp.interp(x, fx, fy) - @pytest.mark.skip("interp() is not supported yet") @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") @@ -428,7 +423,6 @@ def test_interp_nan_fx(self, xp, dtype_y, dtype_x): fx[-1] = numpy.nan # x and fx must remain sorted (NaNs are the last) return xp.interp(x, fx, fy) - @pytest.mark.skip("interp() is not supported yet") @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") @@ -441,7 +435,6 @@ def test_interp_nan_x(self, xp, dtype_y, dtype_x): x[-1] = numpy.nan # x and fx must remain sorted (NaNs are the last) return xp.interp(x, fx, fy) - @pytest.mark.skip("interp() is not supported yet") @testing.with_requires("numpy>=1.17.0") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_dtypes("efdFD", name="dtype_y") @@ -454,7 +447,6 @@ def test_interp_inf_fy(self, xp, dtype_y, dtype_x): fy[0] = fy[2] = fy[-1] = numpy.inf return xp.interp(x, fx, fy) - @pytest.mark.skip("interp() is not supported yet") @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") @@ -467,7 +459,6 @@ def test_interp_inf_fx(self, xp, dtype_y, dtype_x): fx[-1] = numpy.inf # x and fx must remain sorted return xp.interp(x, fx, fy) - @pytest.mark.skip("interp() is not supported yet") @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") @@ -480,7 +471,6 @@ def test_interp_inf_x(self, xp, dtype_y, dtype_x): x[-1] = numpy.inf # x and fx must remain sorted return xp.interp(x, fx, fy) - @pytest.mark.skip("interp() is not supported yet") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) @testing.numpy_cupy_allclose(atol=1e-5) @@ -493,7 +483,6 @@ def test_interp_size1(self, xp, dtype_y, dtype_x): right = 20 return xp.interp(x, fx, fy, left, right) - @pytest.mark.skip("interp() is not supported yet") @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") From 5cda3d2cc07f0b01f0de6d64ac70b959a360b9f7 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 07:37:51 -0700 Subject: [PATCH 15/37] Add dpnp tests for interp --- dpnp/tests/test_mathematical.py | 138 ++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index c416a55e5796..91c13a579e0d 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -1143,6 +1143,144 @@ def test_complex(self, xp): assert_raises((ValueError, TypeError), xp.i0, a) +class TestInterp: + @pytest.mark.parametrize( + "dtype_x", get_all_dtypes(no_bool=True, no_complex=True) + ) + @pytest.mark.parametrize("dtype_y", get_all_dtypes(no_bool=True)) + def test_all_dtypes(self, dtype_x, dtype_y): + x = numpy.linspace(0.1, 9.9, 20).astype(dtype_x) + xp = numpy.linspace(0.0, 10.0, 5).astype(dtype_x) + fp = (xp * 1.5 + 1).astype(dtype_y) + + ix = dpnp.array(x) + ixp = dpnp.array(xp) + ifp = dpnp.array(fp) + + expected = numpy.interp(x, xp, fp) + result = dpnp.interp(ix, ixp, ifp) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize( + "dtype_x", get_all_dtypes(no_bool=True, no_complex=True) + ) + @pytest.mark.parametrize("dtype_y", get_complex_dtypes()) + def test_complex_fp(self, dtype_x, dtype_y): + x = numpy.array([0.25, 0.75], dtype=dtype_x) + xp = numpy.array([0.0, 1.0], dtype=dtype_x) + fp = numpy.array([1 + 1j, 3 + 3j], dtype=dtype_y) + + ix = dpnp.array(x) + ixp = dpnp.array(xp) + ifp = dpnp.array(fp) + + expected = numpy.interp(x, xp, fp) + result = dpnp.interp(ix, ixp, ifp) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_bool=True, no_complex=True) + ) + def test_left_right_args(self, dtype): + x = numpy.array([-1, 0, 1, 2, 3, 4, 5, 6], dtype=dtype) + xp = numpy.array([0, 3, 6], dtype=dtype) + fp = numpy.array([0, 9, 18], dtype=dtype) + + ix = dpnp.array(x) + ixp = dpnp.array(xp) + ifp = dpnp.array(fp) + + expected = numpy.interp(x, xp, fp, left=-40, right=40) + result = dpnp.interp(ix, ixp, ifp, left=-40, right=40) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("val", [numpy.nan, numpy.inf, -numpy.inf]) + def test_naninf(self, val): + x = numpy.array([0, 1, 2, val]) + xp = numpy.array([0, 1, 2]) + fp = numpy.array([10, 20, 30]) + + ix = dpnp.array(x) + ixp = dpnp.array(xp) + ifp = dpnp.array(fp) + + expected = numpy.interp(x, xp, fp) + result = dpnp.interp(ix, ixp, ifp) + assert_dtype_allclose(result, expected) + + def test_empty_x(self): + x = numpy.array([]) + xp = numpy.array([0, 1]) + fp = numpy.array([10, 20]) + + ix = dpnp.array(x) + ixp = dpnp.array(xp) + ifp = dpnp.array(fp) + + expected = numpy.interp(x, xp, fp) + result = dpnp.interp(ix, ixp, ifp) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_float_dtypes()) + def test_period(self, dtype): + x = numpy.array([-180, 0, 180], dtype=dtype) + xp = numpy.array([-90, 0, 90], dtype=dtype) + fp = numpy.array([0, 1, 0], dtype=dtype) + + ix = dpnp.array(x) + ixp = dpnp.array(xp) + ifp = dpnp.array(fp) + + expected = numpy.interp(x, xp, fp, period=180) + result = dpnp.interp(ix, ixp, ifp, period=180) + assert_dtype_allclose(result, expected) + + def test_errors(self): + x = dpnp.array([0.5]) + + # xp and fp have different lengths + xp = dpnp.array([0]) + fp = dpnp.array([1, 2]) + assert_raises(ValueError, dpnp.interp, x, xp, fp) + + # xp is not 1D + xp = dpnp.array([[0, 1]]) + fp = dpnp.array([1, 2]) + assert_raises(ValueError, dpnp.interp, x, xp, fp) + + # fp is not 1D + xp = dpnp.array([0, 1]) + fp = dpnp.array([[1, 2]]) + assert_raises(ValueError, dpnp.interp, x, xp, fp) + + # xp and fp are empty + xp = dpnp.array([]) + fp = dpnp.array([]) + assert_raises(ValueError, dpnp.interp, x, xp, fp) + + # x complex + x_complex = dpnp.array([1 + 2j]) + xp = dpnp.array([0.0, 2.0]) + fp = dpnp.array([0.0, 1.0]) + assert_raises(TypeError, dpnp.interp, x_complex, xp, fp) + + # period is zero + x = dpnp.array([1.0]) + xp = dpnp.array([0.0, 2.0]) + fp = dpnp.array([0.0, 1.0]) + assert_raises(ValueError, dpnp.interp, x, xp, fp, period=0) + + # period has a different SYCL queue + q1 = dpctl.SyclQueue() + q2 = dpctl.SyclQueue() + + x = dpnp.array([1.0], sycl_queue=q1) + xp = dpnp.array([0.0, 2.0], sycl_queue=q1) + fp = dpnp.array([0.0, 1.0], sycl_queue=q1) + period = dpnp.array([180], sycl_queue=q2) + assert_raises(ValueError, dpnp.interp, x, xp, fp, period=period) + + @pytest.mark.parametrize( "rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3] ) From a65a1dd92e40599f3db541f5bfbe76977cfea56e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 07:55:44 -0700 Subject: [PATCH 16/37] Update docstrings for def interp() --- dpnp/dpnp_iface_mathematical.py | 96 ++++++++++++++------------------- 1 file changed, 41 insertions(+), 55 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 1fc2866c7f9f..693b4583f33c 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2692,50 +2692,51 @@ def gradient(f, *varargs, axis=None, edge_order=1): def interp(x, xp, fp, left=None, right=None, period=None): """ - One-dimensional linear interpolation for monotonically increasing sample points. + One-dimensional linear interpolation. Returns the one-dimensional piecewise linear interpolant to a function with given discrete data points (`xp`, `fp`), evaluated at `x`. + For full documentation refer to :obj:`numpy.interp`. + Parameters ---------- - x : array_like - The x-coordinates at which to evaluate the interpolated values. + x : {dpnp.ndarray, usm_ndarray} + Input 1-D array. The x-coordinates at which to evaluate + the interpolated values. + + xp : {dpnp.ndarray, usm_ndarray} + Input 1-D array. The x-coordinates of the data points, + must be increasing if argument `period` is not specified. + Otherwise, `xp` is internally sorted after normalizing + the periodic boundaries with ``xp = xp % period``. + + fp : {dpnp.ndarray, usm_ndarray} + Input 1-D array. The y-coordinates of the data points, + same length as `xp`. - xp : 1-D sequence of floats - The x-coordinates of the data points, must be increasing if argument - `period` is not specified. Otherwise, `xp` is internally sorted after - normalizing the periodic boundaries with ``xp = xp % period``. + left : {None, scalar, dpnp.ndarray, usm_ndarray}, optional + Value to return for `x < xp[0]`. - fp : 1-D sequence of float or complex - The y-coordinates of the data points, same length as `xp`. + Default: ``fp[0]``. - left : optional float or complex corresponding to fp - Value to return for `x < xp[0]`, default is `fp[0]`. + right : {None, scalar, dpnp.ndarray, usm_ndarray}, optional + Value to return for `x > xp[-1]`. - right : optional float or complex corresponding to fp - Value to return for `x > xp[-1]`, default is `fp[-1]`. + Default: ``fp[-1]``. - period : None or float, optional + period : {None, scalar, dpnp.ndarray, usm_ndarray}, optional A period for the x-coordinates. This parameter allows the proper interpolation of angular x-coordinates. Parameters `left` and `right` are ignored if `period` is specified. + Default: ``None``. + Returns ------- - y : float or complex (corresponding to fp) or ndarray + y : {dpnp.ndarray, usm_ndarray} The interpolated values, same shape as `x`. - Raises - ------ - ValueError - If `xp` and `fp` have different length - If `xp` or `fp` are not 1-D sequences - If `period == 0` - - See Also - -------- - scipy.interpolate Warnings -------- @@ -2747,6 +2748,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): A simple check for `xp` being strictly increasing is:: + import dpnp as np np.all(np.diff(xp) > 0) Examples @@ -2755,40 +2757,29 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> xp = np.array([1, 2, 3]) >>> fp = np.array([3 ,2 ,0]) >>> x = np.array([2.5]) - >>> np.interp(2.5, xp, fp) - 1.0 - >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp) + >>> np.interp(x, xp, fp) + array([1.]) + >>> x = np.array([0, 1, 1.5, 2.72, 3.14]) + >>> np.interp(x, xp, fp) array([3. , 3. , 2.5 , 0.56, 0. ]) + >>> x = np.array([3.14]) >>> UNDEF = -99.0 - >>> np.interp(3.14, xp, fp, right=UNDEF) - -99.0 - - Plot an interpolant to the sine function: - - >>> x = np.linspace(0, 2*np.pi, 10) - >>> y = np.sin(x) - >>> xvals = np.linspace(0, 2*np.pi, 50) - >>> yinterp = np.interp(xvals, x, y) - >>> import matplotlib.pyplot as plt - >>> plt.plot(x, y, 'o') - [] - >>> plt.plot(xvals, yinterp, '-x') - [] - >>> plt.show() + >>> np.interp(x, xp, fp, right=UNDEF) + array([-99.]) Interpolation with periodic x-coordinates: - >>> x = [-180, -170, -185, 185, -10, -5, 0, 365] - >>> xp = [190, -190, 350, -350] - >>> fp = [5, 10, 3, 4] + >>> x = np.array([-180, -170, -185, 185, -10, -5, 0, 365]) + >>> xp = np.array([190, -190, 350, -350]) + >>> fp = np.array([5, 10, 3, 4]) >>> np.interp(x, xp, fp, period=360) array([7.5 , 5. , 8.75, 6.25, 3. , 3.25, 3.5 , 3.75]) Complex interpolation: - >>> x = [1.5, 4.0] - >>> xp = [2,3,5] - >>> fp = [1.0j, 0, 2+3j] + >>> x = np.array([1.5, 4.0]) + >>> xp = np.array([2,3,5]) + >>> fp = np.array([1.0j, 0, 2+3j]) >>> np.interp(x, xp, fp) array([0.+1.j , 1.+1.5j]) @@ -2802,10 +2793,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): raise ValueError("fp and xp are not of the same length") if xp.size == 0: raise ValueError("array of sample points is empty") - if not x.flags.c_contiguous: - raise NotImplementedError( - "Non-C-contiguous x is currently not supported" - ) + usm_type, exec_q = get_usm_allocations([x, xp, fp]) x_dtype = dpnp.common_type(x, xp) @@ -2826,8 +2814,6 @@ def interp(x, xp, fp, left=None, right=None, period=None): fp = dpnp.asarray(fp, dtype=out_dtype, order="C") if period is not None: - # The handling of "period" below is modified from NumPy's - if dpnp.is_supported_array_type(period): if dpu.get_execution_queue([exec_q, period.sycl_queue]) is None: raise ValueError( From 99cc8b5147e5ddc9415f4aa94309f071c218d130 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 11 Apr 2025 08:53:41 -0700 Subject: [PATCH 17/37] Remove lines after merging --- dpnp/tests/third_party/cupy/test_type_routines.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dpnp/tests/third_party/cupy/test_type_routines.py b/dpnp/tests/third_party/cupy/test_type_routines.py index bf5c7af9ded0..e35b40d90841 100644 --- a/dpnp/tests/third_party/cupy/test_type_routines.py +++ b/dpnp/tests/third_party/cupy/test_type_routines.py @@ -47,10 +47,6 @@ def test_can_cast(self, xp, from_dtype, to_dtype): return ret -<<<<<<< HEAD -======= -# @pytest.mark.skip("dpnp.common_type() is not implemented yet") ->>>>>>> e8871f0d797 (Second impl with dispatch_vector[only floating]) class TestCommonType(unittest.TestCase): @testing.numpy_cupy_equal() From 1263eb503d2e4a001ab9c5e3cf6ee38fffeb11f9 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 14 Apr 2025 02:45:18 -0700 Subject: [PATCH 18/37] Add type_check flag to cupy tests --- .../third_party/cupy/math_tests/test_misc.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dpnp/tests/third_party/cupy/math_tests/test_misc.py b/dpnp/tests/third_party/cupy/math_tests/test_misc.py index aeebd90142a0..4542c51de33e 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_misc.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_misc.py @@ -369,7 +369,7 @@ def test_real_if_close_with_float_tol_false(self, xp, dtype): @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -379,7 +379,7 @@ def test_interp(self, xp, dtype_y, dtype_x): @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_period(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -389,7 +389,7 @@ def test_interp_period(self, xp, dtype_y, dtype_x): @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_left_right(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -402,7 +402,7 @@ def test_interp_left_right(self, xp, dtype_y, dtype_x): @testing.with_requires("numpy>=1.17.0") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_dtypes("efdFD", name="dtype_y") - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_nan_fy(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -414,7 +414,7 @@ def test_interp_nan_fy(self, xp, dtype_y, dtype_x): @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_nan_fx(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -426,7 +426,7 @@ def test_interp_nan_fx(self, xp, dtype_y, dtype_x): @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_nan_x(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -438,7 +438,7 @@ def test_interp_nan_x(self, xp, dtype_y, dtype_x): @testing.with_requires("numpy>=1.17.0") @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_dtypes("efdFD", name="dtype_y") - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_inf_fy(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -450,7 +450,7 @@ def test_interp_inf_fy(self, xp, dtype_y, dtype_x): @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_inf_fx(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -462,7 +462,7 @@ def test_interp_inf_fx(self, xp, dtype_y, dtype_x): @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_inf_x(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -473,7 +473,7 @@ def test_interp_inf_x(self, xp, dtype_y, dtype_x): @testing.for_all_dtypes(name="dtype_x", no_bool=True, no_complex=True) @testing.for_all_dtypes(name="dtype_y", no_bool=True) - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_size1(self, xp, dtype_y, dtype_x): # interpolate at points on and outside the boundaries x = xp.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype=dtype_x) @@ -486,7 +486,7 @@ def test_interp_size1(self, xp, dtype_y, dtype_x): @testing.with_requires("numpy>=1.17.0") @testing.for_float_dtypes(name="dtype_x") @testing.for_dtypes("efdFD", name="dtype_y") - @testing.numpy_cupy_allclose(atol=1e-5) + @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64()) def test_interp_inf_to_nan(self, xp, dtype_y, dtype_x): # from NumPy's test_non_finite_inf x = xp.asarray([0.5], dtype=dtype_x) From b84dd7e065e13688152d353507e2cdc82ba236d1 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 14 Apr 2025 07:14:42 -0700 Subject: [PATCH 19/37] Add common_interpolate_checks with common utils --- dpnp/backend/extensions/ufunc/CMakeLists.txt | 1 + .../elementwise_functions/interpolate.cpp | 110 +++++++++++++----- 2 files changed, 80 insertions(+), 31 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 99163539191c..bbc6881ffcd0 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -70,6 +70,7 @@ endif() set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 7e6cf7b4613e..784cef224548 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -36,9 +36,15 @@ #include "kernels/elementwise_functions/interpolate.hpp" +#include "ext/validation_utils.hpp" + namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +using ext::validation::array_names; +using ext::validation::array_ptr; +using ext::validation::common_checks; + namespace dpnp::extensions::ufunc { @@ -105,27 +111,22 @@ sycl::event interpolate_call(sycl::queue &exec_q, interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types]; -std::pair - py_interpolate(const dpctl::tensor::usm_ndarray &x, - const dpctl::tensor::usm_ndarray &idx, - const dpctl::tensor::usm_ndarray &xp, - const dpctl::tensor::usm_ndarray &fp, - std::optional &left, - std::optional &right, - dpctl::tensor::usm_ndarray &out, - sycl::queue &exec_q, - const std::vector &depends) +void common_interpolate_checks( + const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + const dpctl::tensor::usm_ndarray &out, + const std::optional &left, + const std::optional &right) { - int x_typenum = x.get_typenum(); - int xp_typenum = xp.get_typenum(); - int fp_typenum = fp.get_typenum(); - int out_typenum = out.get_typenum(); + array_names names = {{&x, "x"}, {&xp, "xp"}, {&fp, "fp"}, {&out, "out"}}; auto array_types = td_ns::usm_ndarray_types(); - int x_type_id = array_types.typenum_to_lookup_id(x_typenum); - int xp_type_id = array_types.typenum_to_lookup_id(xp_typenum); - int fp_type_id = array_types.typenum_to_lookup_id(fp_typenum); - int out_type_id = array_types.typenum_to_lookup_id(out_typenum); + int x_type_id = array_types.typenum_to_lookup_id(x.get_typenum()); + int xp_type_id = array_types.typenum_to_lookup_id(xp.get_typenum()); + int fp_type_id = array_types.typenum_to_lookup_id(fp.get_typenum()); + int out_type_id = array_types.typenum_to_lookup_id(out.get_typenum()); if (x_type_id != xp_type_id) { throw py::value_error("x and xp must have the same dtype"); @@ -134,17 +135,37 @@ std::pair throw py::value_error("fp and out must have the same dtype"); } - auto fn = interpolate_dispatch_vector[fp_type_id]; - if (!fn) { - throw py::type_error("Unsupported dtype"); + if (left) { + const auto &l = left.value(); + names.insert({&l, "left"}); + if (l.get_ndim() != 0) { + throw py::value_error("left must be a zero-dimensional array"); + } + + int left_type_id = array_types.typenum_to_lookup_id(l.get_typenum()); + if (left_type_id != fp_type_id) { + throw py::value_error( + "left must have the same dtype as fp and out"); + } } - if (!dpctl::utils::queues_are_compatible(exec_q, {x, idx, xp, fp, out})) { - throw py::value_error( - "Execution queue is not compatible with allocation queues"); + if (right) { + const auto &r = right.value(); + names.insert({&r, "right"}); + if (r.get_ndim() != 0) { + throw py::value_error("right must be a zero-dimensional array"); + } + + int right_type_id = array_types.typenum_to_lookup_id(r.get_typenum()); + if (right_type_id != fp_type_id) { + throw py::value_error( + "right must have the same dtype as fp and out"); + } } - dpctl::tensor::validation::CheckWritable::throw_if_not_writable(out); + common_checks({&x, &xp, &fp, left ? &left.value() : nullptr, + right ? &right.value() : nullptr}, + {&out}, names); if (x.get_ndim() != 1 || xp.get_ndim() != 1 || fp.get_ndim() != 1 || idx.get_ndim() != 1 || out.get_ndim() != 1) @@ -159,13 +180,40 @@ std::pair if (x.get_size() != out.get_size() || x.get_size() != idx.get_size()) { throw py::value_error("x, idx, and out must have the same size"); } +} + +std::pair + py_interpolate(const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + std::optional &left, + std::optional &right, + dpctl::tensor::usm_ndarray &out, + sycl::queue &exec_q, + const std::vector &depends) +{ + if (x.get_size() == 0) { + return {sycl::event(), sycl::event()}; + } + + common_interpolate_checks(x, idx, xp, fp, out, left, right); + + int out_typenum = out.get_typenum(); + + auto array_types = td_ns::usm_ndarray_types(); + int out_type_id = array_types.typenum_to_lookup_id(out_typenum); + + auto fn = interpolate_dispatch_vector[out_type_id]; + if (!fn) { + throw py::type_error("Unsupported dtype"); + } std::size_t n = x.get_size(); std::size_t xp_size = xp.get_size(); - void *left_ptr = left.has_value() ? left.value().get_data() : nullptr; - - void *right_ptr = right.has_value() ? right.value().get_data() : nullptr; + void *left_ptr = left ? left.value().get_data() : nullptr; + void *right_ptr = right ? right.value().get_data() : nullptr; sycl::event ev = fn(exec_q, x.get_data(), idx.get_data(), xp.get_data(), fp.get_data(), @@ -173,15 +221,15 @@ std::pair sycl::event args_ev; - if (left.has_value() && right.has_value()) { + if (left && right) { args_ev = dpctl::utils::keep_args_alive( exec_q, {x, idx, xp, fp, out, left.value(), right.value()}, {ev}); } - else if (left.has_value()) { + else if (left) { args_ev = dpctl::utils::keep_args_alive( exec_q, {x, idx, xp, fp, out, left.value()}, {ev}); } - else if (right.has_value()) { + else if (right) { args_ev = dpctl::utils::keep_args_alive( exec_q, {x, idx, xp, fp, out, right.value()}, {ev}); } From e9e357c9d33601288c4c92d52eaade297d38d065 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 14 Apr 2025 07:30:11 -0700 Subject: [PATCH 20/37] Reuse IsNan from common utils --- .../elementwise_functions/interpolate.hpp | 29 ++++--------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index fd9e64d5a75a..cde69266f1b5 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -3,35 +3,15 @@ #include #include +#include "ext/common.hpp" #include "utils/type_utils.hpp" namespace type_utils = dpctl::tensor::type_utils; -namespace dpnp::kernels::interpolate -{ +using ext::common::IsNan; -template -struct IsNan +namespace dpnp::kernels::interpolate { - static bool isnan(const T &v) - { - if constexpr (type_utils::is_complex_v) { - using vT = typename T::value_type; - - const vT real1 = std::real(v); - const vT imag1 = std::imag(v); - - return IsNan::isnan(real1) || IsNan::isnan(imag1); - } - else if constexpr (std::is_floating_point_v || - std::is_same_v) { - return sycl::isnan(v); - } - - return false; - } -}; - template sycl::event interpolate_impl(sycl::queue &q, const TCoord *x, @@ -74,7 +54,8 @@ sycl::event interpolate_impl(sycl::queue &q, if (IsNan::isnan(res)) { res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; if (IsNan::isnan(res) && - (fp[x_idx] == fp[x_idx + 1])) { + (fp[x_idx] == fp[x_idx + 1])) + { res = fp[x_idx]; } } From 50e451365dd66f0e5be6f96939734451d8a51627 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 14 Apr 2025 07:45:13 -0700 Subject: [PATCH 21/37] Remove dublicate copy --- dpnp/dpnp_iface_mathematical.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 7615bb2cf768..c9fec899f981 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2882,9 +2882,6 @@ def interp(x, xp, fp, left=None, right=None, period=None): left = None right = None - x = x.astype(x_float_type) - xp = xp.astype(x_float_type) - # normalizing periodic boundaries x %= period xp %= period From dbeb3131c52a92a293f1606d502ad3b90a6b446a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 14 Apr 2025 10:58:15 -0700 Subject: [PATCH 22/37] Add _validate_interp_param() function --- dpnp/dpnp_iface_mathematical.py | 50 +++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index c9fec899f981..0e19cfcb6bda 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -349,6 +349,36 @@ def _process_ediff1d_args(arg, arg_name, ary_dtype, ary_sycl_queue, usm_type): return arg, usm_type +def _validate_interp_param(param, name, exec_q, usm_type): + """ + Validate and convert optional parameters for interpolation. + + Returns a USM array or None if the input is None. + """ + if param is None: + return None + + if dpnp.is_supported_array_type(param): + if param.ndim != 0: + raise ValueError( + f"a {name} value must be 0-dimensional, " + f"but got {param.ndim}-dim" + ) + if dpu.get_execution_queue([exec_q, param.sycl_queue]) is None: + raise ValueError( + "input arrays and {name} must be on the same SYCL queue" + ) + return param.get_array() + + if dpnp.isscalar(param): + return dpt.asarray(param, sycl_queue=exec_q, usm_type=usm_type) + + raise TypeError( + f"a {name} value must be a scalar or 0-d supported array, " + f"but got {type(param)}" + ) + + _ABS_DOCSTRING = """ Calculates the absolute value for each element :math:`x_i` of input array `x`. @@ -2867,18 +2897,12 @@ def interp(x, xp, fp, left=None, right=None, period=None): fp = dpnp.asarray(fp, dtype=out_dtype, order="C") if period is not None: - if dpnp.is_supported_array_type(period): - if dpu.get_execution_queue([exec_q, period.sycl_queue]) is None: - raise ValueError( - "input arrays and period must be allocated " - "on the same SYCL queue" - ) - else: - period = dpnp.asarray(period, sycl_queue=exec_q, usm_type=usm_type) - + period = _validate_interp_param(period, "period", exec_q, usm_type) if period == 0: raise ValueError("period must be a non-zero value") period = dpnp.abs(period) + + # left/right are ignored when period is specified left = None right = None @@ -2895,12 +2919,8 @@ def interp(x, xp, fp, left=None, right=None, period=None): output = dpnp.empty(x.shape, dtype=out_dtype) idx = dpnp.searchsorted(xp, x, side="right") - left_usm = ( - dpnp.array(left, fp.dtype).get_array() if left is not None else None - ) - right_usm = ( - dpnp.array(right, fp.dtype).get_array() if right is not None else None - ) + left_usm = _validate_interp_param(left, "left", exec_q, usm_type) + right_usm = _validate_interp_param(right, "right", exec_q, usm_type) _manager = dpu.SequentialOrderManager[exec_q] mem_ev, ht_ev = ufi._interpolate( From dbb1b55597c26b358372070133db7ad0021b61d7 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 15 Apr 2025 02:50:32 -0700 Subject: [PATCH 23/37] Impove code coverage --- dpnp/dpnp_iface_mathematical.py | 12 ++++++++---- dpnp/tests/test_mathematical.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 0e19cfcb6bda..c15876684922 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -349,7 +349,7 @@ def _process_ediff1d_args(arg, arg_name, ary_dtype, ary_sycl_queue, usm_type): return arg, usm_type -def _validate_interp_param(param, name, exec_q, usm_type): +def _validate_interp_param(param, name, exec_q, usm_type, dtype=None): """ Validate and convert optional parameters for interpolation. @@ -371,7 +371,9 @@ def _validate_interp_param(param, name, exec_q, usm_type): return param.get_array() if dpnp.isscalar(param): - return dpt.asarray(param, sycl_queue=exec_q, usm_type=usm_type) + return dpt.asarray( + param, dtype=dtype, sycl_queue=exec_q, usm_type=usm_type + ) raise TypeError( f"a {name} value must be a scalar or 0-d supported array, " @@ -2919,8 +2921,10 @@ def interp(x, xp, fp, left=None, right=None, period=None): output = dpnp.empty(x.shape, dtype=out_dtype) idx = dpnp.searchsorted(xp, x, side="right") - left_usm = _validate_interp_param(left, "left", exec_q, usm_type) - right_usm = _validate_interp_param(right, "right", exec_q, usm_type) + left_usm = _validate_interp_param(left, "left", exec_q, usm_type, fp.dtype) + right_usm = _validate_interp_param( + right, "right", exec_q, usm_type, fp.dtype + ) _manager = dpu.SequentialOrderManager[exec_q] mem_ev, ht_ev = ufi._interpolate( diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index a88fcc85e42c..f125187d6bf4 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -1270,6 +1270,9 @@ def test_errors(self): fp = dpnp.array([0.0, 1.0]) assert_raises(ValueError, dpnp.interp, x, xp, fp, period=0) + # period is not scalar or 0-dim + assert_raises(TypeError, dpnp.interp, x, xp, fp, period=[180]) + # period has a different SYCL queue q1 = dpctl.SyclQueue() q2 = dpctl.SyclQueue() @@ -1280,6 +1283,19 @@ def test_errors(self): period = dpnp.array([180], sycl_queue=q2) assert_raises(ValueError, dpnp.interp, x, xp, fp, period=period) + # left is not scalar or 0-dim + left = dpnp.array([1.0]) + assert_raises(ValueError, dpnp.interp, x, xp, fp, left=left) + + # left is 1-d array + left = dpnp.array([1.0]) + assert_raises(ValueError, dpnp.interp, x, xp, fp, left=left) + + # left has a different SYCL queue + left = dpnp.array(1.0, sycl_queue=q2) + if q1 != q2: + assert_raises(ValueError, dpnp.interp, x, xp, fp, left=left) + @pytest.mark.parametrize( "rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3] From cbe7e7a4bf5202394f6ab22d0ee3707d0375d4cd Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 15 Apr 2025 03:29:36 -0700 Subject: [PATCH 24/37] Add sycl_queue tests for interp --- dpnp/dpnp_iface_mathematical.py | 6 +++++- dpnp/tests/test_sycl_queue.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index c15876684922..a3b6c8d4e6c8 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -368,6 +368,8 @@ def _validate_interp_param(param, name, exec_q, usm_type, dtype=None): raise ValueError( "input arrays and {name} must be on the same SYCL queue" ) + if dtype is not None: + param = param.astype(dtype) return param.get_array() if dpnp.isscalar(param): @@ -2919,7 +2921,9 @@ def interp(x, xp, fp, left=None, right=None, period=None): assert xp.flags.c_contiguous assert fp.flags.c_contiguous - output = dpnp.empty(x.shape, dtype=out_dtype) + output = dpnp.empty( + x.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type + ) idx = dpnp.searchsorted(xp, x, side="right") left_usm = _validate_interp_param(left, "left", exec_q, usm_type, fp.dtype) right_usm = _validate_interp_param( diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index b0112702e308..1f015d6ab2dd 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1453,6 +1453,24 @@ def test_choose(device): assert_sycl_queue_equal(result.sycl_queue, chc.sycl_queue) +@pytest.mark.parametrize("device", valid_dev, ids=dev_ids) +@pytest.mark.parametrize("left", [None, dpnp.array(-1.0)]) +@pytest.mark.parametrize("right", [None, dpnp.array(99.0)]) +@pytest.mark.parametrize("period", [None, dpnp.array(180.0)]) +def test_interp(device, left, right, period): + x = dpnp.linspace(0.1, 9.9, 20, device=device) + xp = dpnp.linspace(0.0, 10.0, 5, sycl_queue=x.sycl_queue) + fp = dpnp.array(xp * 2 + 1, sycl_queue=x.sycl_queue) + + l = None if left is None else dpnp.array(left, sycl_queue=x.sycl_queue) + r = None if right is None else dpnp.array(right, sycl_queue=x.sycl_queue) + p = None if period is None else dpnp.array(period, sycl_queue=x.sycl_queue) + + result = dpnp.interp(x, xp, fp, left=l, right=r, period=p) + + assert_sycl_queue_equal(result.sycl_queue, x.sycl_queue) + + @pytest.mark.parametrize("device", valid_dev, ids=dev_ids) class TestLinAlgebra: @pytest.mark.parametrize( From aa102bd5ffd7e40f7cdb802ee9d906d799613f8e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 15 Apr 2025 03:50:34 -0700 Subject: [PATCH 25/37] Add usm_type tests for interp() --- dpnp/dpnp_iface_mathematical.py | 10 ++++-- dpnp/tests/test_usm_type.py | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index a3b6c8d4e6c8..5c5eb7b05244 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2921,15 +2921,19 @@ def interp(x, xp, fp, left=None, right=None, period=None): assert xp.flags.c_contiguous assert fp.flags.c_contiguous - output = dpnp.empty( - x.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type - ) idx = dpnp.searchsorted(xp, x, side="right") left_usm = _validate_interp_param(left, "left", exec_q, usm_type, fp.dtype) right_usm = _validate_interp_param( right, "right", exec_q, usm_type, fp.dtype ) + usm_type, exec_q = get_usm_allocations( + [x, xp, fp, period, left_usm, right_usm] + ) + output = dpnp.empty( + x.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type + ) + _manager = dpu.SequentialOrderManager[exec_q] mem_ev, ht_ev = ufi._interpolate( x.get_array(), diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 1d512ce111a6..ad8b6ba9403f 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1268,6 +1268,65 @@ def test_choose(usm_type_x, usm_type_ind): assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind]) +class TestInterp: + @pytest.mark.parametrize("usm_type_x", list_of_usm_types) + @pytest.mark.parametrize("usm_type_xp", list_of_usm_types) + @pytest.mark.parametrize("usm_type_fp", list_of_usm_types) + def test_basic(self, usm_type_x, usm_type_xp, usm_type_fp): + x = dpnp.linspace(0.1, 9.9, 20, usm_type=usm_type_x) + xp = dpnp.linspace(0.0, 10.0, 5, usm_type=usm_type_xp) + fp = dpnp.array(xp * 2 + 1, usm_type=usm_type_fp) + + result = dpnp.interp(x, xp, fp) + + assert x.usm_type == usm_type_x + assert xp.usm_type == usm_type_xp + assert fp.usm_type == usm_type_fp + assert result.usm_type == du.get_coerced_usm_type( + [usm_type_x, usm_type_xp, usm_type_fp] + ) + + @pytest.mark.parametrize("usm_type_x", list_of_usm_types) + @pytest.mark.parametrize("usm_type_left", list_of_usm_types) + @pytest.mark.parametrize("usm_type_right", list_of_usm_types) + def test_left_right(self, usm_type_x, usm_type_left, usm_type_right): + x = dpnp.linspace(-1.0, 11.0, 5, usm_type=usm_type_x) + xp = dpnp.linspace(0.0, 10.0, 5, usm_type=usm_type_x) + fp = dpnp.array(xp * 2 + 1, usm_type=usm_type_x) + + left = dpnp.array(-100, usm_type=usm_type_left) + right = dpnp.array(100, usm_type=usm_type_right) + + result = dpnp.interp(x, xp, fp, left=left, right=right) + + assert left.usm_type == usm_type_left + assert right.usm_type == usm_type_right + assert result.usm_type == du.get_coerced_usm_type( + [ + x.usm_type, + xp.usm_type, + fp.usm_type, + left.usm_type, + right.usm_type, + ] + ) + + @pytest.mark.parametrize("usm_type_x", list_of_usm_types) + @pytest.mark.parametrize("usm_type_period", list_of_usm_types) + def test_period(self, usm_type_x, usm_type_period): + x = dpnp.linspace(0.1, 9.9, 20, usm_type=usm_type_x) + xp = dpnp.linspace(0.0, 10.0, 5, usm_type=usm_type_x) + fp = dpnp.array(xp * 2 + 1, usm_type=usm_type_x) + period = dpnp.array(10.0, usm_type=usm_type_period) + + result = dpnp.interp(x, xp, fp, period=period) + + assert period.usm_type == usm_type_period + assert result.usm_type == du.get_coerced_usm_type( + [x.usm_type, xp.usm_type, fp.usm_type, period.usm_type] + ) + + @pytest.mark.parametrize("usm_type", list_of_usm_types) class TestLinAlgebra: @pytest.mark.parametrize( From 82c657e1efa2c957a66257b1b56c57474d6735d6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 15 Apr 2025 05:33:50 -0700 Subject: [PATCH 26/37] Fix pre-commit remark --- dpnp/backend/kernels/elementwise_functions/interpolate.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index cde69266f1b5..76ad0946aa22 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -54,8 +54,7 @@ sycl::event interpolate_impl(sycl::queue &q, if (IsNan::isnan(res)) { res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; if (IsNan::isnan(res) && - (fp[x_idx] == fp[x_idx + 1])) - { + (fp[x_idx] == fp[x_idx + 1])) { res = fp[x_idx]; } } From b89f41ae07d407abfb4c655bf083e14d8efc5272 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 28 Apr 2025 01:53:16 -0700 Subject: [PATCH 27/37] Move value_type_of to ext/common.hpp --- dpnp/backend/extensions/common/ext/common.hpp | 12 ++++++++++++ .../ufunc/elementwise_functions/interpolate.cpp | 14 ++------------ .../ufunc/elementwise_functions/nan_to_num.cpp | 16 ++++------------ 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/common.hpp b/dpnp/backend/extensions/common/ext/common.hpp index 9a45e21a4e7a..695a42fc5c6a 100644 --- a/dpnp/backend/extensions/common/ext/common.hpp +++ b/dpnp/backend/extensions/common/ext/common.hpp @@ -106,6 +106,18 @@ struct IsNan } }; +template +struct value_type_of +{ + using type = T; +}; + +template +struct value_type_of> +{ + using type = T; +}; + size_t get_max_local_size(const sycl::device &device); size_t get_max_local_size(const sycl::device &device, int cpu_local_size_limit, diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 784cef224548..d79a10595cd9 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -36,11 +36,13 @@ #include "kernels/elementwise_functions/interpolate.hpp" +#include "ext/common.hpp" #include "ext/validation_utils.hpp" namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +using ext::common::value_type_of; using ext::validation::array_names; using ext::validation::array_ptr; using ext::validation::common_checks; @@ -51,18 +53,6 @@ namespace dpnp::extensions::ufunc namespace impl { -template -struct value_type_of -{ - using type = T; -}; - -template -struct value_type_of> -{ - using type = T; -}; - template using value_type_of_t = typename value_type_of::type; diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp index ec5dfd0a78b3..9880006331d5 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp @@ -52,9 +52,13 @@ #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" +#include "ext/common.hpp" + namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +using ext::common::value_type_of; + // declare pybind11 wrappers in py_internal namespace namespace dpnp::extensions::ufunc { @@ -62,18 +66,6 @@ namespace dpnp::extensions::ufunc namespace impl { -template -struct value_type_of -{ - using type = T; -}; - -template -struct value_type_of> -{ - using type = T; -}; - template using value_type_of_t = typename value_type_of::type; From 36ee45562e9bf37a8201738447c0bdc0ef6ee749 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 28 Apr 2025 03:34:24 -0700 Subject: [PATCH 28/37] Address remarks --- .../elementwise_functions/interpolate.cpp | 78 ++++++++++--------- .../elementwise_functions/interpolate.hpp | 32 ++++++-- dpnp/dpnp_iface_mathematical.py | 3 +- dpnp/tests/test_sycl_queue.py | 6 +- 4 files changed, 74 insertions(+), 45 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index d79a10595cd9..5ad3463a2169 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -31,8 +31,8 @@ #include // dpctl tensor headers -#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" #include "kernels/elementwise_functions/interpolate.hpp" @@ -41,6 +41,7 @@ namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace type_utils = dpctl::tensor::type_utils; using ext::common::value_type_of; using ext::validation::array_names; @@ -57,18 +58,18 @@ template using value_type_of_t = typename value_type_of::type; typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, - const void *, // x - const void *, // idx - const void *, // xp - const void *, // fp - const void *, // left - const void *, // right - void *, // out - std::size_t, // n - std::size_t, // xp_size + const void *, // x + const void *, // idx + const void *, // xp + const void *, // fp + const void *, // left + const void *, // right + void *, // out + const std::size_t, // n + const std::size_t, // xp_size const std::vector &); -template +template sycl::event interpolate_call(sycl::queue &exec_q, const void *vx, const void *vidx, @@ -77,15 +78,15 @@ sycl::event interpolate_call(sycl::queue &exec_q, const void *vleft, const void *vright, void *vout, - std::size_t n, - std::size_t xp_size, + const std::size_t n, + const std::size_t xp_size, const std::vector &depends) { - using dpctl::tensor::type_utils::is_complex_v; + using type_utils::is_complex_v; using TCoord = std::conditional_t, value_type_of_t, T>; const TCoord *x = static_cast(vx); - const std::int64_t *idx = static_cast(vidx); + const TIdx *idx = static_cast(vidx); const TCoord *xp = static_cast(vxp); const T *fp = static_cast(vfp); const T *left = static_cast(vleft); @@ -114,6 +115,7 @@ void common_interpolate_checks( auto array_types = td_ns::usm_ndarray_types(); int x_type_id = array_types.typenum_to_lookup_id(x.get_typenum()); + int idx_type_id = array_types.typenum_to_lookup_id(idx.get_typenum()); int xp_type_id = array_types.typenum_to_lookup_id(xp.get_typenum()); int fp_type_id = array_types.typenum_to_lookup_id(fp.get_typenum()); int out_type_id = array_types.typenum_to_lookup_id(out.get_typenum()); @@ -124,38 +126,41 @@ void common_interpolate_checks( if (fp_type_id != out_type_id) { throw py::value_error("fp and out must have the same dtype"); } + if (idx_type_id != static_cast(td_ns::typenum_t::INT64)) { + throw py::value_error("The type of idx must be int64"); + } - if (left) { - const auto &l = left.value(); - names.insert({&l, "left"}); - if (l.get_ndim() != 0) { + auto left_v = left ? &left.value() : nullptr; + if (left_v) { + names.insert({left_v, "left"}); + if (left_v->get_ndim() != 0) { throw py::value_error("left must be a zero-dimensional array"); } - int left_type_id = array_types.typenum_to_lookup_id(l.get_typenum()); + int left_type_id = + array_types.typenum_to_lookup_id(left_v->get_typenum()); if (left_type_id != fp_type_id) { throw py::value_error( "left must have the same dtype as fp and out"); } } - if (right) { - const auto &r = right.value(); - names.insert({&r, "right"}); - if (r.get_ndim() != 0) { + auto right_v = right ? &right.value() : nullptr; + if (right_v) { + names.insert({right_v, "right"}); + if (right_v->get_ndim() != 0) { throw py::value_error("right must be a zero-dimensional array"); } - int right_type_id = array_types.typenum_to_lookup_id(r.get_typenum()); + int right_type_id = + array_types.typenum_to_lookup_id(right_v->get_typenum()); if (right_type_id != fp_type_id) { throw py::value_error( "right must have the same dtype as fp and out"); } } - common_checks({&x, &xp, &fp, left ? &left.value() : nullptr, - right ? &right.value() : nullptr}, - {&out}, names); + common_checks({&x, &xp, &fp, left_v, right_v}, {&out}, names); if (x.get_ndim() != 1 || xp.get_ndim() != 1 || fp.get_ndim() != 1 || idx.get_ndim() != 1 || out.get_ndim() != 1) @@ -167,6 +172,10 @@ void common_interpolate_checks( throw py::value_error("xp and fp must have the same size"); } + if (xp.get_size() == 0) { + throw py::value_error("array of sample points is empty"); + } + if (x.get_size() != out.get_size() || x.get_size() != idx.get_size()) { throw py::value_error("x, idx, and out must have the same size"); } @@ -183,12 +192,12 @@ std::pair sycl::queue &exec_q, const std::vector &depends) { + common_interpolate_checks(x, idx, xp, fp, out, left, right); + if (x.get_size() == 0) { return {sycl::event(), sycl::event()}; } - common_interpolate_checks(x, idx, xp, fp, out, left, right); - int out_typenum = out.get_typenum(); auto array_types = td_ns::usm_ndarray_types(); @@ -215,13 +224,10 @@ std::pair args_ev = dpctl::utils::keep_args_alive( exec_q, {x, idx, xp, fp, out, left.value(), right.value()}, {ev}); } - else if (left) { - args_ev = dpctl::utils::keep_args_alive( - exec_q, {x, idx, xp, fp, out, left.value()}, {ev}); - } - else if (right) { + else if (left || right) { args_ev = dpctl::utils::keep_args_alive( - exec_q, {x, idx, xp, fp, out, right.value()}, {ev}); + exec_q, {x, idx, xp, fp, out, left ? left.value() : right.value()}, + {ev}); } else { args_ev = diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index 76ad0946aa22..c671a13312c7 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -1,21 +1,43 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + #pragma once #include #include #include "ext/common.hpp" -#include "utils/type_utils.hpp" - -namespace type_utils = dpctl::tensor::type_utils; using ext::common::IsNan; namespace dpnp::kernels::interpolate { -template +template sycl::event interpolate_impl(sycl::queue &q, const TCoord *x, - const std::int64_t *idx, + const TIdx *idx, const TCoord *xp, const TValue *fp, const TValue *left, diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 5c5eb7b05244..b77f0f0ce3a1 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -366,7 +366,8 @@ def _validate_interp_param(param, name, exec_q, usm_type, dtype=None): ) if dpu.get_execution_queue([exec_q, param.sycl_queue]) is None: raise ValueError( - "input arrays and {name} must be on the same SYCL queue" + f"input arrays and {name} must be allocated " + "on the same SYCL queue" ) if dtype is not None: param = param.astype(dtype) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 1f015d6ab2dd..8f6476ea13e9 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1454,9 +1454,9 @@ def test_choose(device): @pytest.mark.parametrize("device", valid_dev, ids=dev_ids) -@pytest.mark.parametrize("left", [None, dpnp.array(-1.0)]) -@pytest.mark.parametrize("right", [None, dpnp.array(99.0)]) -@pytest.mark.parametrize("period", [None, dpnp.array(180.0)]) +@pytest.mark.parametrize("left", [None, -1.0]) +@pytest.mark.parametrize("right", [None, 99.0]) +@pytest.mark.parametrize("period", [None, 180.0]) def test_interp(device, left, right, period): x = dpnp.linspace(0.1, 9.9, 20, device=device) xp = dpnp.linspace(0.0, 10.0, 5, sycl_queue=x.sycl_queue) From 3b0eb603d02934918870a03ac809b701eb29400f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 28 Apr 2025 09:03:52 -0700 Subject: [PATCH 29/37] Address the rest remarks --- dpnp/backend/kernels/elementwise_functions/interpolate.hpp | 2 ++ dpnp/dpnp_iface_mathematical.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index c671a13312c7..1b46327bf545 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -47,6 +47,8 @@ sycl::event interpolate_impl(sycl::queue &q, const std::size_t xp_size, const std::vector &depends) { + // Selected over the work-group version + // due to simpler execution and slightly better performance. return q.submit([&](sycl::handler &h) { h.depends_on(depends); h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index b77f0f0ce3a1..76434981c71f 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -2919,8 +2919,6 @@ def interp(x, xp, fp, left=None, right=None, period=None): fp = fp[asort_xp] xp = dpnp.concatenate((xp[-1:] - period, xp, xp[0:1] + period)) fp = dpnp.concatenate((fp[-1:], fp, fp[0:1])) - assert xp.flags.c_contiguous - assert fp.flags.c_contiguous idx = dpnp.searchsorted(xp, x, side="right") left_usm = _validate_interp_param(left, "left", exec_q, usm_type, fp.dtype) From 70611c2cb0854b75f73a58d7e6a87fcd5e3815ce Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 29 Apr 2025 01:43:27 -0700 Subject: [PATCH 30/37] helper files --- perf.py | 50 +++++++++++ repro | Bin 0 -> 100088 bytes repro.cpp | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++ test.py | 22 +++++ test_cpp | Bin 0 -> 101704 bytes test_cpp.cpp | 96 +++++++++++++++++++++ 6 files changed, 399 insertions(+) create mode 100644 perf.py create mode 100755 repro create mode 100644 repro.cpp create mode 100644 test.py create mode 100755 test_cpp create mode 100644 test_cpp.cpp diff --git a/perf.py b/perf.py new file mode 100644 index 000000000000..f863ad32406e --- /dev/null +++ b/perf.py @@ -0,0 +1,50 @@ +import numpy +import dpnp +import dpctl +from dpnp.dpnp_utils import get_usm_allocations + +import time +from IPython import get_ipython + +ipython = get_ipython() +if ipython is None: + from IPython.terminal.interactiveshell import TerminalInteractiveShell + ipython = TerminalInteractiveShell() + + +dtypes = ['f4', 'f8'] + +print(dpctl.select_default_device().name) + +for dtype in dtypes: + xp_size = 16_000_000 + fp_size = 2_000_000 + + xp_type = dtype + fp_type = dtype + + numpy.random.seed(81) + xp = numpy.sort(numpy.random.uniform(0, 1000, size=xp_size).astype(xp_type)) + numpy.random.seed(76) + fp = numpy.sort(numpy.random.uniform(-100, 100, size=xp_size).astype(fp_type)) + + numpy.random.seed(70) + x = numpy.random.uniform(xp[0], xp[-1], size=fp_size).astype(xp_type) + + x_dp = dpnp.array(x) + xp_dp = dpnp.array(xp) + fp_dp = dpnp.array(fp) + + _, exec_q = get_usm_allocations([x_dp, xp_dp, fp_dp]) + + _ = dpnp.interp(x_dp, xp_dp, fp_dp) + + print(f"xp_type : {xp_type}") + print(f"fp_type : {fp_type}") + print(f"Numpy: ") + ipython.run_line_magic('timeit', '-n 10 -r 7 numpy.interp(x,xp,fp)') + + time.sleep(1) + + print(f"DPNP: ") + ipython.run_line_magic('timeit', '-n 10 -r 7 dpnp.interp(x_dp,xp_dp,fp_dp); exec_q.wait()') diff --git a/repro b/repro new file mode 100755 index 0000000000000000000000000000000000000000..259ce265b91133c4dc26c1c3a49d8ee7f17cbf31 GIT binary patch literal 100088 zcmeFa3tUuH_dh%+C|(bi74=k7QAw78m(t1t1(~rC@RB%Mq9J%m!AAzQo(vSc3=>7x z)BR%Cr>oh`%%>Faf@vjYH_H+=V7Q}B}fzu&#r3(>ZD9GeFT9|G&Dej6`CDNhE)aSlNomf*JQwLM5=`z!$rdH4j z#?PY{D=`oKt_2ctcxb3*O)Wv0FPsrYW1G+m6=yPesHb#N=4(7+C8}w}KQ!y1rZCMj zUkIZS3~)rmD&nDsU7Gb!-+ZdPF!3NPPAdxS+(WY->YI<)RGM|Z%8@#shnm{xd{yC^ zdh!u2=36>g=UaM*&gY?Ff~SXi@T8CY^$|S#w2TEuUegFNI3iSNGBwXPQu2)yeGm2M zd#DHdVY}_)|4w-+k}s@hkn$v~XIvAyM~h7!>M3ss@)4I)J*3%9L#4bx?GW8sTl7#V z-gNDhNq1h?>)I*fubDC_D|gm4v-(_j&2_!H=VW)+4NkvU2K!WE(ru;|`KD6JJo!E8 zG)G70XI6}xc$U?ka(&^bt!?L&q%Au1?1sIqmVR~l)b<0uSvP9y z-IlwSt?!*&6P56__42SUXZ7s++=gB)@7{D)Y+j#_9*r!l%=lvJ#97HTf6g2?@tnuo z4`_IE!}0B9vu=>0P9Tw}@L4BFO{OPNNq_Pn0OC(hY$N>GDGZ>`@x`B=0(2UG@|*zj z>jL=SD}a70G_*ha98Ubnm!re@lP?WmKPv!k?a`V1*=Z91hd}}Cqq=@^&Iq7?R{;C# z1Nd`x06U8U$lnWqrxW@-XPO`91}OL50632gP;OCxdQAzSe?b8G{s8^wg#iBC8=$@; z1K7VVfSp?c;7}64{=@)&E(;*H1iYpu?&j;7laV^LCLAIlYa>{KnUkPAu{bS^Q4o5tGWsBVf?p~>`>Dz~Glq>q-I zEAl?TjrwNM_gt555c?(hT7T(zIzwmp*$MiKML$~XjDwv{ri)Cm>AGCcbfiqmP3f!c zc*=bY`G{x!C0dTCI!XhSI|8ZHU))T;8unRk{&*=@n=!rR!RPTVU(gqT@?41C)E2$fw<=?W`3%zYyF^cQ(cC z38`0@;29)^3>G`JTeO{Tu9Aebv9fYA-3dst($dG-(fSY#ZL>x^i4~fmOUv8t=pu5Mh&;1 z&AmuzNSZeJ0Nv!n?7h-6rcYNI*#t%EJ~bg>=&R?un7)&8 za30d0HZ?vYCmsDDAtnZ$ICr`}TM`m_@ubm?c>U=8&)kgMjO(UmOiiD5cS6dHlv`&^ z9eS&8aXr(s$7iHX$wqI+Nq`>iQ^6k5IN>H_WsFZ7Hw7BvM8ecY6Lcy;8@xU}8$Dx| zUD%kO)23(Rti|~{1>=WEJ(WD6)HH&M2GC;-vfN2e8UfW6iHRYeo`Tfi(yXcjdNh_K z&SCiRj4PIbGg$YSUf#~CokRCipfqMPNv4lF^w8v#B4ZYMoHC-v zOq`dqCfR9m3N&CYBa0Q9k(1?PAm{F!baXXn;kq;3l;hFT#p3jzl97gCJ7e7VjPa_J zap`xljI>OgIVPMP_KYcr=h)NGoFVm@!N@ldgpn z>6=Hiff-;-@79fX^w*idPW8qnq@xwd#Zb;uk4zsj=~^jqz<{)#-J?y3R!jc@X+664 z?9~|U;f?wTyEn#rbnoreh!OF~kucY*dyg2MB5YsNfRQ6H-lfHK@7cYlFB8+#SJ5}c zHxDK*EwJVgYzo3EMQf}og(3_E*9O0~;6w07zLlw+3R|hLy$U-(+6wuD6vr?KTFeoo z@-WuQWGH!%(qjrs^5uNJV5!t(n?`Vn;zVoC>9ZaZAH{tE)Pr}MtsHwN`xfhUcWr`F25tYBCX)t&mLBM2_ z_(T4m^^al3*ugYX^c{Io;M9lq>v!^ZsmWf{nSt+oKjC7Gf$9{%)lo zYtrp1XQ{zK< zqL17&Z>QYsBlp}NQSQ4)6+TwmK~tm68;=MdIc`n8M^_&?ZF!DpA3591bM*0%vkcD> z>m!#YS4n0cIXbBKNb!-QV|kB}K5}$G?=jX#-rg%UnI`(kae{b{X+H9fUa846%SX<0 z&vWGa$bHve7W&A2@7qg!?8NwyE6GzAGzGhsa#8a}d6?8NwVlyGdN6xh+&oRHQ>1Xd-9x8xhoa_C3#L|-KC1(K%P@8_fo}|ljoGyy;$)p$a9M2 zE>ZluX5B+n_5yGrq+$a8Asu2lR`@|+U6 zOBFweJg467rHb!Qo>L?DV#VJ;o>L-siQ;>Z=Tyj@ulTFTa|-01rua+AbL!(BtN07Z zbIRjRQT$otIi+{UD!wgwPI27Pif=)lQy6!I;*Z=6o>LchnBspY&nb)Br1)RSbE@L5 z{gd_INj`#nmEylA&KQt@As=aj@^(w>dJhBXTWKU#8vXY>JKVvfexp@VzmZJZU<}-B3^;CfY&MtefCn8$sSi3ewj0Kh?|=HlR3xO_9yD?)B$0*17j zbIry5Rs;o^TnWb-8aNh*vdKGe+Vh+W88B^-| zYKO_B_?6rpqC{mLk*K`m;awF+8ybu*`A{^mzsCG#acmxAcI-44{9KzdJRznu=2LS; zMeP+B?$6oMH^}ryW_P1YA=T0A`aN+Yd?*fCMAEn3ehymUJ}A-MHVsu`7w1O4=ucAm zz3(K60+6-lvc0#M%j#R2gVvchAGddgfebMa+EA5gbcr{aJo)Kge!rLdp{lvvZZjA3 zy8xQNbho{|+1YR14Zx}|uyT#Z3A-+2HuQq-;Cbnr-#rtF;?Heysc|FXZi^ev({q-w zszn~Uv{6C2Jc?tw}`i`O4S zf+LsqidBMe({(x$wwMuQ8ObbWqFQW?jBvF)f);hgM}`rn4rWKS*>QXYFN?U^I&Y6m zG0*$KeyiCzAk_R3DyY^d%zfAAUTCfupbK-hG&{C8q=cCz5?*s*em^Tf#V~R`u7Abm z2FIiFH2hG_uX5PqWz zn4Rq-*9D_L0A%3NU=MLEsbeGeLgj`>hAL!T?NHy9cN1PVD{Q$A10l2Xrbuhw26O~8 zN0_-`o$I|rl%mc4UPlN&~mHIvP|1>qvObWjK%X`%UVjYBn5GMAeAN zx{q^12hwRfg|i^(rgvz^)BW7`|i77R@hP?Z;3G<3Lqf>A&g81Mtb3ti@xgYg%-CBKez2_>~QTlpaC9EpwVpzuzl1C;O1Uwt_TclO%yJIg(wr#qkfZmD}R?^ z4(Q|W3b29w<@Eor{XHW7-X#9Mgzol#$KQ{7DSxkM>hD$JubS(|eCn;Ax%lcf2jYyd z_<6?;7-1cXcc5C`%W{X`GgL_z93ywbJ$QH6+%E{D zQ0|hN$-eDvZdh-2tgQ()7knPHiVp`?_UBm4`!m-x)LqHUj=iqSerLn2U3nXmc_rjf zoutum{N!4*zxgoU18I|1N8SEldD;3F{kq=%V2hbO^u=^MP4EuX)sZFe0t*XsX(5|Q zycwnSS{CW|gy!vzxBW2WsWU5`6U>a7>Il2({oqk9E zdBM1lyI(*t-m@xEoe1;ZKmv`QjZ267Suo8Neien^F?o0Y=v6Sv610I8oVsF&`I6~x zfzbxM>4R|B#q@WX-VaQ(^zS8|J4D@|lfhMSEU$_KPH%eDIltdEJ zm{LriXN``#J?@UUF>z^ktoD>XKzqNI10$v}omu*FFwN4J)a&#=aCvY)52i_a?qcTs z4rw)ynjNb>GXtYAA4t1O_BAxr+|8Dsfu4%=nkfj}e;|#NcXcY0|4AaQ-)7fas83Bh zn&_!bsHvK3oyxl1wGt&L6U}EAx9vp&XLYW-V8J~H`Zc&sU0rh_158I(Gj*ct4!F*k z*~Kdzrfsgfs5+cl$qf22=vH%nO{79UC-Y%nXHvF)B0%bKduDMFC-+?djdVaXe zKEUey7q1As*d&Hp`?~BK^ZRvOO3jZ8OBO5YyT5F?>_>}?MF9^4bSiQ&ui&ohsM6!X>`-h zn!693ZM1tbu3U|U{{8ZTPO-cr7|H){l=o2gQ>!0$W;d4S?H{;(3qv2#WBR{1tae4; za6OHV#o7ETv^ z!Hp*Seh!arsMS={7A)khtoe*%2po=6#cX#ShJ)VuLBWw0_VWvl1lc>z`!e@jb4BZV zz!+1C4A(PswlX_fBjVL({}`smjxJVb9vnTIdpRBLgELqBgnMym?g8)uWXYGdqp~EKF^3^QHxf(svjRaE(`sUqN264GX+Pk8ZTiShjZteEvxwYDt z=ay|RubxLZLq0)e;jdB1g?g+BdFSt{ea((77RMnA&bNzDS*)Wy0Ck*xR>vW;<9pR& z$O8;wO()i$xB1QLf4xuLxKcjSjmsdW*YW7T&X9MmXj;G6$9?=YSCsM@~Na75~D z!_0H`@Rh%KBkJSG2pxGpGE7I_jN~h4NO~!<76T6=Pe)ei$Rm-JI&yzxsg4vzE=7c9 z7AyLGWQn40M&>K}Qsgv6pN<@>=p&ISirya?t7u_lG^k^(*|BSKSR7Upx5pgfnn}$7 z_>W;WLbKbzP7bn}9pAZ1ahHN7p2s_T+%dm^HI)k;w?~F!O{lYDcw`uc)eCW>))mPd zQLmbFZ)A94MpmvdkN4`}!X{;9Sd4kCHP@N@8;yB~alw`a^*G0uagOzIhsuNOXPX^W zlUvwaRw=UzTA2y+zPwu{w6wW;N0ETs0=uw&;wj2{^Q_QER*Cxj|agMF=j`ekwnXiQggfN|OuFX{}376<%cC^zDoM&_8NJ?u-Ia{Y(WOLah}sR4UvG2$g8OBn zaSzgC4^>FUn{BSoC1D&B3L7-~y=|@!L~eoHSWz9SEp@lKo|c4Yop7#B=xK8mNWytK z;e4HNqs=v05{|Q1uBiTpPPobD8X^h1b;5-@p`XpwM-o2M2^Z;vSevVhB)q2*e(9_e zdZGT3@T5))C&mxDX>PN*E|7#J%z-k_(PfOZxlEGsh)&rlP}|V{Sddcf zFDcR)N1^_bG8QR^R=3c_kG8qq7dal3s8Y^0=H{Vgd6NCZn7a{CbMftw(JM@-sTx{b z591A3Jd7CrmB}R1pInZnXGmfZkxTtUOt z_O=PQnFujE*46wns5tmKb3-W?>f2cwHpCURusXKXJel9G7s}*Qzn(Zd%t0IZ(4!8o zAb)3e95xpmzTN(%^{)Nl$xeGKu72C25@I-Y8ys}Ze)hnk$QuV2X9p$T5P9=p$3A;? zO*>1_p#k~#;)Q9H+E7-r5*AuaFEKkRU9<7r7?TT2!Df^DUR+F!uECK;RLEm^Ovp7z zJtiAnWsL4BV|G^=!@J6u-c`oJeLv}snrtCO zN&-@vUpsK#U1StE)T5KC$&of!p(uw#+5JzE2iaUxM9xR|%P~sJ03^^P*j%@Y!gegI z99kWWl?glxWE%{$xvrJ8)kt$s6giBZC-OHSUmht&;Zi5v5+iJ`gIG@^5DsV{;Vh$| zzgh*Vnw)BLeJ6=iX!a_R!)&R@heM8WGN{r~UiW2PId^O4;)bXS(*)PkqQgh`#)@BX z?fo5{?NE6uV_r?8gEK^{EwtF)7aDU1%jhAac_)njqO=rF|Cl@z&3U1oDFl9VeXMMm38Y_3_7(q2;J1i94a8Y3ycqiDJ<4R(Re z6)!2@Ns7#5uCTeTl9ZK5NtpMkvG8Jz(heE;yVzWs!X%Yroy>HuvbjE%gcO}1*P|$#>qSYpUYn98y4vO{l7x#@j*3`)3Ax7R z`llp>=o~UNim|yyO2QtjRpOc_m(N=;|B-|(NI>V3v-bv@>k3I&p%X%lg8SqY>uYnh zk;G?^=+1;3DBX8H*A?qq*gN5|=9&m|-DdM8?bqPSP~+zF!cfj%FUNgKt1i5aZ;uD1 z9S&TKXhpfYH*@q4H9Ok6EwJdhU30|Y`63?4IfC3hD8}i~AM-!lyb<6_AVP!glr~Am z{7+I^>y%wGg-XHvPf~t^tEi<6I0J01SV{OwC!A{(yo4UalHzTy%OvqbB)abxd6>=B zPUO$2Ow}^7UTbsxj{7RQHeV;S)m=K;=GrO=Q;`7pTNs7o#oSP|za*xx7^}#`ZLXI@ z-bZEfOrQqYTuw>o!i2_ox)tp&DIq$GH1lAzza;FzYHj1qz!0>*By7V0p(kep;B&hY2mj734J*>HBl-7F)`A8I6bF};|t#yy)9#mQ{ zBW$a+CTaO|T5GK4<|wVl5Vq4=$y)x9)=Jb|j?!`<rLoiV$6mS}V1Dg4X&#a~7qQh7cnXwZ8il#1UfCT3=}HMx`|bA}l;F;|@t^U99CHTC20>s+HEC2-PW7 z9AeV)I!KV`(0b+9R;9HMA#PkK->Kzpt+iEi8Y4n=1Mg8hRX9ALaaNW?jIt?-I~a4*Bb9= z{mGhp*sFh$)*7MZFKVr1&COF z<61dVbN7I&2%+*NTI(DwFVb3Jnwz4u<|4dQYXxcfy;|#71-x;Dj8R&4gqLZp-?ThS zYwglpiqgtN$Vn(Fu}RCvYOS@J>#wv%BD`E{y`$wrwbq-O>!q{?A-qCsJ)`CETI+Gm zU7@sYLU^Uta%lN=T5G=M!j)Ecgk81P-CEvNYt7PJTcs6&FiLA>Y54_OYpUjsT|iXN zg6t}-m9FLOwN{4a_A0Gl$hv8*FTc)%=LwK#$>Zs-Gv{pOKy{fcUB8=8r$JeP| zuv}{$)!dUxYbnAQt+h|fU)Nf{XzqTc^#Z~kT5F4zKdrSkYVJOz^$5bATI(Y%e?V)! zt+{DRYd*qWTI(q-&(~THYwk{^H49;Htu;r>r)#a5nj5aPCL_E~Yo%-Xc&#-?a|4yu z?Fg^eTFF{ILTgzx*GFk3A?%~I`f9mZYu&85tCUtOLcZsQUT}k!-=MXw*W86ls|UhZ zt<_D-uhv>uXs)BuibNQvwa(M>i?voK&6$+exd{7ftspHA(^`M7RjvB_`Rvi5kPXmU z`?S1;)^cm^XQg!nVZ7Gbs^xz`g1UU8xo?!#9)t;6YlD{mqP13QZoSg_3E@Dk^{$qG zr?uYH+zO>ti7-)XJ)`9twASOAdsAtZQ>L{XTK=)tD%9MwO6z@u7Oj=5zIi5GHA@18Y>vP1IWZG&e|TjYXKOwRULvD6O?!b2llip$Jp7*4J8Y z)moc1*IjAFBOIc&R%`jqTI&gu}GfOIm)Z)_O*B z!Ak1_gu}H~v6gq%S_?IID4cz@J!E_UhJJUqmbcPcGd1T@TE`KN&{}tCc|9b^GhTDw zE3I0Dsak88mj9}?ESmdFY3)QfQfu|r@@-nHo90$3t*;Q;wAOiA{)N^$S9431);ffv zv{nZ#FV$MDHTQzjT8?nE)~a8vTKiqCbx3oMD6Q8K-mbN(wfrTmwOe!ZmDbY;@6cM` zYx$#EYqRENDXj+(j?r2xwS1x0TCTatN-G~>n$~(%%kR-zFKF&|r8OPlSgo~4%cp29 zr{*b#7DF~g`(9q(z{C;^@NSyrVNMk^m z&XwY*b*+SKUg@mPd?BM1?%g-Vd}%gbEHf6BJ1T9fhmJ_Wb9%>m*JE0()8wErjxzI> zU&>sq%x!v=tL;XiV@5T3J~)go6X2$=qWp2ZS7NCseHFw!=P>WlW?pP9?yvw;^}*fR ze|mFV!w&9F@Y%nJ)Jd(_UPsLZySmt~FxUNnN7xbOsI3;q5lhhb7RLcg&=1JGRb?(Q z2W>Y8{UpYX`v>E8Z?F?v6*gM%hJrO}qq*-^qu?<>Df?;G9?$NG--rkVV`33xjzzE} z9~z*`7Hhh)l=Qyr`BkKU<{9{RHLFz3Dpj*e)l#KusZzC6sajX5c$KO|@iI?cTEea3+ff%|Nxl0eEXeYC zbbL|A_=Serm#UsGXU01_X2v@V_b{bk^6>FRNiE_RCbw|k2)R%Fp-%0~m3BPV<`tjd zy!BXuW6&{o2c_+!Z*_LGLjNGwPx(As^MjqY)(>_Js(1egd6WLNwmz)Y+WJnd-CP6) z{vLi!*{rYuwFo=)uXX=NrRAd`@o-}kIv$>Smog zSbHpNKHs}rdn{^!reX7Ah1WD}{#fBP4I7vb-U+zLRJ{8}#C-7a;O)LlX`G0EyQcbr|3Tc^YWoWRc&D}Fv3Q5oIOhHV zDL(cE|Fs@{&0C>gZ-sun`;Dgh9=zQTs~leZA&>7^isE>oQS|j^BPsUkH0?l8ubUpGA9C`oCnS#0;ZGb4;;&!2z+vWgq+^< z_muO`XLVt0fu=t8CjDQrH-E9%*VM;;oBvnr&o7noH1#R(2`s_*;#1hsq)v!Vg@%rV z6?Tm9NOS~}SdTzW` zE6QV<(6Iq%tjMGPCJmE(rvD-8@!M062=T+?S1m?)9#sKosmPc9OV|arqXCW&BGBZG(u>Y(Asu7y($1L z75UP?NlPSO{{Il|+(y@LtdxJI(Fm!3tknNZqgPA)!=(Oa8to(XuWBpj`58tBN&Oc~ zz0WjyyVO5L>VKxuEUAC(8A|h|-rn`g`k8!@3riz~YCzzYE(<<$Qc=E`Yx!7~Umhff z)mZk0)p&coxw!Xy%of&^9nLB<&pDEhd^5wX4y?<6Ywp(To0}Vc@mYU^vJJQTR>xLz z!HzEW%gl9%lwo~;gUYc9Ilf>HPx%&XE}wIROU}mqe5cjo#5amInakY4=BQ2PzDIJ# ztl2qxWa*l+*?BD)42V!+tP00k=B(DM)xVM`S-uJvtFTmsRZ_t074DzlJC}+?0n-tU$%T8%E~e zi*G@ii(`Xw52}5W*uOaMOM4W)2i$jq{nB`2S+Egb)V^Z^zFE{Zq0jnU^+NitIAd8^ zTvTa13~@aKdmD9zHn9tPh4Pj4-<90)JHB~4xkXw`sr%sb`t>-q6Y_V*@t6h(t3(K; zmQ<<~-shXW_}U2`NLd`;n6b~yac^W-*8tpk^9xO3>ssP{%F+hDkH;_Ut}6_V;j=5g zgo(H0`ptTRUj%U3p$_d}yr{P@6ZarihdnafjL(il@Y|@-{3>l&aa@o!z1EywroQ|% zx#b;$9o2&!yJHWn4YKD9j@oB+?7}Ai?z=eNadgfu*vrG`maOXQEiS(HZw}gpEt%2F zO(;G-s@|MliEmN{;}f#3woqolKf|xfnb91C_us#>Mpaq+)*25y4Y9*rup`u%_Yk55 zyN_EO+jYUgx$RLr-ks#9jB7eu9WHC%`kZuY@x&l}AsTG%zIaD{e3T2`_5~P5aS{~y z{kg96jGv=*U5%xW$-#H<6~!1NqG?gt(7612JDBZI>JJ#p6jxTB( zzp%|}Yw<<*0$_+k^Uk#fm05$zlbxMcCl_CM0d~9YQs2X@D{BoOa<71H3U%kyF9mNr z_#zfOPRs(WUs-L(rutuRO#(ipkEl@s2hS z;{%mt_>~8{*`d9KOTkcR2?hMhTe>fW0?TEYNN9ls{K{Ln+emo}&$r;Km-s4XL%I7P z*Sox{ADJa?@I0b4X-F12P3VI{UlCd%v`Xk7*nG@`|1*upyP=vsCgVz>%nK&S^P-{h zoRlxJ^4KH!DZ4{BLhOW=j6?l0Q-M_mKQuB>!iU|38wy zNb+Y({@W%0Et3B#$$y^YUnBXSll*ff{{+cDT=HKp`7f3HhUEWP@;@f|?~(jzlHV%% zua*24NdDH6|9#2-pyZz}`BNo-yyU-9@}DL78-%_o`4>q3$&x==^4}!+yGZ`FlCMta zOOiiN^4}@>6D5By$$y^YZz1{i34L1f&y@V5B>ydv|7yu^Nd6;2t0n&iD1{ArTEzvRDC@^_YebwYPa z{!NnqUdf*-`EQW?T_k^d$+u7F_maOt^4lf9Me<)Q`8!Ge<3e{x{?8=;N0L8N^4}== zFP8i*C7&kFhG}+IJFFGrJVJBQXcvT9*1tbVbygRmc z_FP?fWzkod@)k(sbjm1v5uZ2URZm@f)FlgQ!S4GZ#&a>NvnU0Z3ai7Ri1SH4{INQo zP{dip=Ojo|#JOmq;vQ1OS;R{a(iCwnnx)eeaSqD2IxP8C=g@^#$B>0q=b#esCEyP&mKwbg)CdfBI{w?I+ zLcSgH?T}-NYN>`C(^uO*$m<}li!U0~5WmpUz$$Xa2^Qnp4DCl&LvI zoJCohQ^Z*`LvxBai|*5$BF;hc;lg~l@&M=qppSw+3i>SQv!Jhlz5==gbP4D(&}E?b z60c&mP6>wgN|daWI4`C4%QAS z;w-vNbBZ{N?$Def&Y}#>DdH@esyRiRMY)<[_3bBZ_z%|oH{P_)i;__d!1f{TOr&=o-+^ zKtBWh3iK<`ZJ^sge+K;-bPwnr&_6)`Kwa^&jpZ2I^H%MSBF>^=np4DCG+J|tIE&IX zr--xYF3l<8EXvWGBF>_JX-*O6paK+GfTEqCPSA%z9|nC2^eNC6L0<%Y1N05h_dwqR z{V(W$LGcY@3p^dP5p*Lco-bRrfc^mb1L!W$U7$6fHJ}GT51@)i*_^26QBP|Q(rzi@ zM4D<9MVv*vmp~gS;w-vTbBZ{NCTmU+XVG-cDdH@;M{|le2hBy%bKyb}Xc6cl&_$q6 zf<6iQ0_Y2%uYb>`a9_F zsOk}P-=W7*S412`)Ol*rjw#|S8lpKxoJBUxDdH>|r#VHOMUynAh_mRQnp4DC#QQOT zq=<9S927nWPB=gvpbvpQ1o{N%6QIw7J`egD=xd<(cN3O(KtBTg2(%2e4D?gbPeC_> zZU)^7x)pQ>=nhafs2g-Y=zdi8Fe{65brc&HjAIDSS6c%=F&xi-5$668{|VZdw+k0| zTywFb0-phziL1NS*`azfUa0H4-gt0b2ouXyVqp|Ct;Of`fTjf>#x2-o95Aw5WEy$A zsE1jjzQ^@B443h+Oe4E(va|j7$;F*J;eL51@0V*~i3e|z)xpUD4&FQSSS-}@+%{I$ z&r5_>35{;{eoCeA5u7yOm?pGTXqc1>$KCLMu%iqwqy}5cb_834KD7pI7#y{pf@J4~ z8{qEQ+Fjs-I*ejP`bU_Hheix8j;+cxa<&FRvIUW&My64-73%{LV7{kxJtFm#mIILU z@rw&H7h57`f9M_!Pcln@3v|6U3(A>uE6~&SI?^6(QKD>7;$y3zEP}`|iZQR3I$KmNjAE04Xl z9($B1dzARKw-G%lP<>l_?6vXOqeR)G#IL=7{e`_yk3B?m`%dzARK_tan5!%3>`weZ-ZMA@UnpS_wS zHU9b^4={jedl*u*f0QVDl=!oUfvs{6t~Wv7dsZFWmwN`@#s#?&3`VgS$zm~*l^Wr5 z^8q?W@BXC-232BYiOn>s<4O@&Dj^zLg=Pcs)U z%GXIlAjq>p;GkgkW2}D->#tX7{Z*dVA6(TsZB$paJbO)xOx)A8#l1-=;jlW|;1(qm zw=7|}e1_w;B@CBP{1d7ASg1rm-WBN)katCTG-Q2vMx+7hb>G@{wo%E0^(&W#3qgw>JZfIHQ;kM6lp~KB{01s<_qJesxmu zh39kSzMLB%^I5e+0E@><7;SkR6dED(i6o)3guX14-!SK~LG%|3-7oY)N$3Ch;$av1 zywFWT*NOhqLVpt)F6sQYVLYY@eMTrhQO%=V^dA@M7TQ_T`wN{S^a-IALe;-|g&oiS zEPkz=N0_AFEL8nRWyt?6{A!^qMBg(%+aY{=Nxwnpc%csoT_u$NW{k)ELVpw*D(Tk= z9V@h0C}%4?-WC0YLbnRV(;#*95PG{%htLm&diLcMO1`g!nk2ou(2+vt34KrK>!P18 zbhFT7La!3N;X>yKeOsv6k@_KitMJ2reN5ei-?#W}#}EIpGZp`GC$(;2YC}2Z-;UJf z_?6;^{m-d*WR;4~d#6_6=fbZRzk02<4AGVN;h$oret{nzUZ&#zZ>Hj3yrklPKBOMX zbfg~Fsvm&;1i!WTeU9JP`2C3AZv1}3?;uqF^p^iVBp>6q20#24mDI2B+lJrI`0c^( z4@c@TZTCHJ|HTi_-BLH=w*|i+@Y{u74Sok4sYktL--TcWe&zUW!0$`^zQ=DTe!t@P zyCd~Tqs#9=@)3S`TAGUgm6M83+oW#AZwG$(f4Hgp9jS*Ky)Nk21~)M+jQe?ilW8`x z$}uAF<7eEsi(T|#h*}(n@ifjD9JRqxu+y*xeLFa+5~IZy&uGzj`OGX?q`Q^r8KLhA zeOu^%g)SGmMQEi^&p7;u@GlE}L8w#cLZPdLmJ0QhzmO3Af$G-Rm=~tjsmLurwC09| zs?qR|Nsx5S!qh*BTutP{)T3yenhO`E?uP(~>b&h2DzJ_)4P#}8*+9wj2(x%A$4@nI zmL(OpZ-aqz2M7KITxlicswyd0P(c%UeMqE6BCiUGydWg~8s!5tA zl&h8G^E6F-kgCryc$>j02AddcXRr^|RA#tx2*Cob5|VO7kd&)`RD6KJvkaCnSjpfE z20t;_D_sil1zesZLTT` z7AcpoKojq0@HB%r8GOv(GX~oj?9t~Cq6==7j9gG6&&4BBE)-GK$>1plZ!q{TgN+P+ zU{K>d#lS7#Dh?@EY)H8}L&}vH(8MAJPcnF&!3qW&7<|v**Typt>;kTmka9(Yl&c@4 zdghUr=m3-W1cTQYe8k{W2Dr7&OLR9mK_OXSmi%0_C(i|XQZB|b$3g~=GkBH3at4?T z=Ouo}fTJHrKExOBO%GD8l#_B*oRllzpot3@JjUQZ3_fH~!Qfj49C2l|1-oFNWagSQ zd9FW`a;=%_^BFwK;1veT7_4LP4Fit0o{<*(f&|IU#b5GV=q2T%F7@Xz_&0->8GOKC zErYKaa6tAMj3HVOF9x{4N}h|Uq+B>c$@FIiv81N)a#4<=;B8T(i))Q4z6oEYGm9C@z4k#en#S>`Z!h{5v=-eFM2U^4?=9GYGjtT%?> zKi``2(rViKj_>;JBoBAdd5K{RA{aztOw7j!%y=w=6b54%;0|OJlV-2Qh2>`488w?~ zS_%%E?61|d_Wb|AIWBFe8bwsIh-wy5ofypkwySBonzpN9drk$lrS#e5nBJq5?;vVc zig&(`YDKL?wNh(B)cuCpaUQC!?r%WD@KZN5px8f?+7-WO{BReM7n`>Qw)gzOo0pqf z`_7hq@ln-zy(ZN2{*c*NeI9;N0wQ7L!WnR(DwdJ145AsRa%OKtIfZ`8;r&_FVYDZX z$RnEGCG-QKEskpWtwJ9Y`m@kp$8`FALN^LM|4%KSEc8R6t&eN@NTEMS`CX*EOrcAK z{wZ{zl($IecA?iwd3Ot4C$zJamm&0Bp)I6bevXaDH&XruQqDM`{5PpQ>V?KhdBsA% z7s~&4!NV?;|5<{EA?1w~`j*f?g(gY)o2C47q@3G@z9RI1(3_;Zg+jj;dbyOBE%ald z{8t$~MhksaXuVL2l>eEOA139b3VmMaUZMOy4Lk~jRtk-f@+J%YP-uvhGeYQ#LJtUy zm+~v5{B}~#P@zu?bqnnw<=rQAqtJgyc^N|A6>5@lh6sID=w6|>NcrVbek&x7;w<&71}e~8QDs8Fkv_k>Wl&>N)uPo(@HDaR~ykp=Cll zNjam1zAE&P&_pTk5urZ|?Jeamm+}t_jg|6>gnlbDLdu&W^dq5dq?}^N@m4BC@gnF1@`oxww5h-sHf%cLyU($Ddn^(yhhu=Yitp9RE>k zhhdJ{gmDqtB>P(TB!(X0TE|K@^UF$c)*Xf9X>nrF~|d1)K} zbUe>AGYL?{PQS|NdWF$7Da`0PBEsl8CfbO)(uhh9Gon%>jHt9|Sc!!N{3>EKQYf{} zRCtr#`C$v6XW{8vJ6>Oj2b{vfWxi2>`Hu;OH?a&Snwp;TYd+MBu9>k$*Q^wy>x{8R z*ZVN*S*FBM6Jm|1>=Ywv=2#;t&oddyc@18d%3|%kJy-@^1WjTsuf#8z)y|Bd2=1># zTHXY3dCA~>DiJ@WStS~qM$K>VA=Xk%x%3W44TW;LM~CoHSp;dwls5sH^0JXBZzeM3HCC;-dDXD_wy|m+OW6Zqsj+Hm8YxvP zI1(}aVk9!3QVK);5fLv?{Rs9mcDaiZ?rK(jvJkk$p{jj8*lRtBtM$ zYtdz@&}AyoWlD{x#9H*3D)gC3^qEq64I}vNi$zI4Om0?^em;!;vj06#f8inr3U6u` zPBb&@?J-NyV-};wl%U7tn~P`U8&MOMqPr|ccPT-4fh;dS=Ph4Adnx-KM5~l>Rui%8 zft9Q|9N!yDIk>!JaLWA)o4YUbZGNa1Lks%{MjBJl20Vf8cVRiSYLWLm#$_SFd z?TbKrb(`e3y#^OMFbJ9O`UKj(VR~!u9LaIMm+GO-2%parh6tl)80Z;YaBg3VmbwL{ z9?UduF!ltYtzsG5g-13w7~g}>Q~YM|*<3=w|7wm(tDN2u4T=txT`RW1@sS86=)!6M z)BSk%#Ae=l#=leXS2bYQ5&N{yW46QJP4IUWyey@E8qbOiBhdcUs-=G?|b1@wXl3cY>!& z@ca%QuZPFWy?81=x2SNN3U`N|7M^d9`71md=bv>a;z|E}=AW&B=y~w(O89rZ7tuCe zM6s<6{S&+(3f>pdQ=WhRc}Fvk{hsdu{1rT_1w{AZbJOlVEc>W@Iegl8k z!QW+GJe8MQRJcuryW5@?okJ`6c`;(TVfqyYxJPj|N*w&2m9lYQP{%_%VQFD*|_Rj$R3Z6Rz&&}|+4F103#Z!5? zMTOf`xZ5}_JoE71{Q``ic)l=jIfA71R_8#x8JbjOb&haZonvav&ZPPSSoK3{UNYVz zO~kvS$#`!x74M9uVYgp$J--Xf{P?g&Qss%+N2u)BFPMzCPn+jXWp2zrnLl9B{!4A_ z3@ATP(E&^K&5iQ?xgEgUe9Y6K?~Bj~yf5pE32JqMeGi({^r*ZL|g39$QGvOU664jdoyH+3~g?-ohQ)_HcuP02i5HEzRsC6? zKt=m4^#puREE@0f`YV;&_DY)9Qt39Wo8I0h&-Xm&vPw!HKcY4R*BQ{Bgq&=aKmyDNz6Y&CYGG6{o#f!gb z*h!mw$ZvbDq&>VZ9MB#V@{;jV@u}JSE7t;h<`}YTI54_?$5U({S;g#kQc%6C7 zZ^(I2+7}9W$#@Ss5$`@Hv{kHEXC$k5I)3Ntgt_AeUr+#>A+i$h- zM)zOWMz^m&R03ha_HB-!-%>p7(+utA*s^^+EztyXPkw17Pg}5k|IBnIv5m2_Ir(^# z(NwNy@3Vc@puO>CcM{&}9`Q3u*#k>|NP9yeFBz|bC*t+*WW4&Fir2o=uz@|98zP%n zRO9oCF{n|oeUNT-MKhchNeEus}0(#-ozJZGNTk479vp*C9)t&9z95H`&^Nz30 z&~1(-+t-796O4V@m#yN#z65dA2A*_4Kkz-`k5}3U;x+apyuv;Ludk1(L3oU@P_;Tc$}HNPqUUnxD2&`JE$< zI+M@^j^ZQyp84&&Y%jd$KM?QsC*i&R5qPJ6%&)NYyQlr3k(Z3G0VLuJ0Lgg!KNWBO zr{UF+WPAbe#P(l7d-z^}e_Ke+OU4%k{MeE9|4UZ_d*N;WfJOT+)%T3%eAzDwr%-oK z`=5YdfVz6xzX{rZfvuv&HYN%+^HGMRE>v0rn>1sW7QEQ5-g}U0saLfdyBDt9_W)4}whZPUCjW#@(de?Vcsr6?*u7 zXl>rXEmR9;+o2*e^pipm#WhjyU2P#DMZ;N9yzB3YP3EFJE zvBP61$tZA~=k<#JPb3>Fc6r#BH6jd~m%^}q7JLsobRop{`Vk)`g z3&oV8m{Kf{EyeT7rJ##J7lW37mVoAi=6l^>N1KtK^L7&lqP{gpad+dD&AUxzNn`m9 z&9|pvBVjT&5~jstXJa;kq&%r*W&#!w*9XPJ;c1!ih*vPqS7>f9j$wQqZLVMqHCDY9(-O2_y_Xf zo2q-6s&8}9jRL+`*~EDd@JsqPKgT@hYa{;0QH>_s+2CFhoEk${vWWkEP4I8wgFoYf zKX#4@{)h?whzb6P3I2!){)n9d{@qUKJL>#`TXcLck=p=E3V*!s3H-q+{J$W#jpY&l zmE^d;P2sx|D4=5mPh>YE-ml}r|^G^++J`B|7Xb6fz$XGYy81${AYXdSM@y&{FmHva{P~> z8cnv#0e>s-zn%C`A^!I^!9Umsf5rv>U@!iN3I2!){)h?whzb6PodW*d{B$9WKONsg zr7$?XNF@PC?I9XO5u{ThGp8vlE}_^bM! z2L9b|J~{qJQH>_s>wy2Qz<)IHzl-?)s|o%g@u;ng|BMU%*vTgSA2GonF~J`(!5=Zf zAF)%w{|!G~NaIh(Ylz$iSW@`oeQe+lPT{|i+%}d+{PET{@CT>xe}mj!a0>sY$kl<< z_&YWJ;5GjL^5U=Rdm8vJx#{HiA3-&mY{vusgMj}i;y;=A-_r#DRzCPMF8E{To8XU_ z;E$N#kC@<(nBb4tDd6ADPZ!eo)A3)4+y+=u_~RXR;15pWzk%E~mPh>YraSNlr|^HB z++J`B|0l`Sfz$XGY5c)!{O|GNuj+dm_;UgLka7k^dX)4+d8-;?8Ck7_j8@CW?Oz<(t1 zpGf>?HNijB2YtRY-es!3r-6U>J}1Y& z4%KL~JrMXO0{>LvKY{qqY=VCqAN&~?{IL^H@JCGWM@;ZXOz=lc@JH+v@PETk7t;9C z@oFNs0hSd0__ze{2dD6_Ah(U>5r2Gw0{DYd`2UC8UT_Nk$H>)z)A%pY_=DH@tB+bJ z{8fEV1OFx0ogDu|s790Rgus6w@E<|^ahugOX+{(L+xprd$n6EE@PCwC z9XO5ue2qVNjlcR1h{9jh_cZYD-uvYEA4D~pY(E743Bdn0;-5kMbDQAb&IfBmN(d<9Zean_L|@jsHB2KX{G5`f`fGU)A?C@L$sNB=}<)Cg~5k=~ zkC?!ZSR?#_*MIzUA>IG!_%0&10hSd0_(l%!2dD5~O>P^@BmVf34)6!3@PCoqUT_Nk zhso7})A$!?{K0Gd)wg*R{;IyGf&Y@|ljHw8s?lUSB=GMK{D%_%JBk1FCir*s!Jl!# zA3F~Pf5ZfT!~}oD1b@T?f5c7!|89P|kj9^m?<8^?U`gSRZwdi_a0>r&a@$xQ@qd>b z_eU!HUm&*^oWg$*xjJwf|G64}@EU*hEhB}$s_$vw-~HN?=f{S!%r8|_|x$nL~a8tDg5yXCg2ZF z;a^5>8_Og9_)rt@2dD6Vp4?t=3jc@5)q&Ia&(ZjU*Z8YXKPmiGeNO}bCD#PPAItn5 z)o{y2cs$?;hEg;v}Ca2~iTLk{)(Z4+U zmq-8d=wBZF%Txa0n~i)tqx_rGWR?;2jX8=oKvW1AV<%-9YDL=c~ZLy&wzdDu~a=hI2t^NYO$*kkR- zACCLTtiEMOn{#PyE@O)rTg2E4jJ?1Z_ogJi%UC&M<&1sF*q4m$M4O+LISxD=Jj6K5GBaFSo*h`FYGhX8RjICj84P#$1_7!74 zBhdc*44Gel{=kL-G}j;8F~GhzpZ5h&R$o@6jd`>-kFkF<_HV{sX6$9gxcM{j1IE@e zwwAH48T*>C-3WB~yCL&i{y|;-L2voIFNCuCP9$y2r?vTvJ<8aljJ?9xD~xe-apE$@ z)-kq@v2PgrhOu7|=<lS3~Bv{5oBJowt0h3qe_ZzLPc<(%M4C9%t-v#$ILYRmS*! zL*jDA)-$%AvF{lBjxiSkUA_x4zvVaCpAYrVm6_ngUONVL1#{eO1%?@7-6 zJjdABeAa$`x>zy5c>hKH*m4ie*?UU(jWi6 z9>lltxe$cqfwzofCu7@5Y(KI6#2zK~C^5c3+w>)3FA{r^*vrITCUzVbEB)i(O_cul z_eCMTjn4%kEKl!c9J?6XE@Jl)yN}qXiG7+FU)FAVlGxXYeVy13iT#k+FL1Ha{{?ub zr~gjxBSU=~pA$n_o=eO)?`G_G6T6?-{lp$4_82jK9MJR>v2PIj2C*Lz`w_8U;$r3J zm*AbApOVjyfBzlg+diPuCuAJ=Ft&S$JwWUMVxJ-Q8DdWpdz#ociG7pUkBR-5*spN0 z(*G5B6Quuy^?`+cpMW|sYR8CeAhv;+mzbBBgP4O@C9z6k1~G$JDX~&ug+Qjr@xAc_ z_`CqgVj>_%cY5^E*aO6+~a-bbv0SOu~75_>PPVq9c;ykw_WXr+e}>{NQU zQRg=5^bzYLb_1~+h_w)FA-0OxDq`it%86Y->;htE;v&;K6Rhm?3as=BGU?q)om;6B zCKe`kJ+bSFH4|$lwvyONVvC6_CU!ot^NCHzMW#0$tnBpit@QAV)T(^%r_TGSvys?F zV%HJ7j@W8qtBGAf>x??+mcA)628c!(Ez6?-uIZLYUxm7Sj5O3$82Z!>i^Qzt|$MC=-3*AVj% z^AKB3Y&o%o#1;~J53%X>DjFG@DmuSd;`=OpiVEbUSgfZI*GZ7xrx;h zt0lI8*aBj+h|MBago{kC2&}C1%5a1*wyf0mSd`pEolV4gi1iR#Pi#H0CSpy*E+=+5 zu}g?uLhM{(=MtNOi%f3{Sef*8tjpU`JiI;+Wy{=O^i!vwI^D#&iLE2Hj#wkHMqj;v&=INRpMFo$1-L(@Rk&MV%nAAhET?))I3Oa}irgY$>sM#O4v3 zNo*#u0$gNz1z=^RXJdM{?DUe8NVhzMv5SeFL+l)4dAP{*^1#ZZ zhxyRfy`vbP=h(Wnea5L1r%o5KE@ExO+KAN=t0PuTteV(G#4aLsHnFpb*>RES*}=-B zSMvF??p*u4nL0O9$4AUZ>}q0H6LS)C600IsMQjeSImFH)b`~)kE;2nESYzouQMMfw zI_~|}_TfM5TK&hWk)hxHDL#8>RUm?!`&7Z`SpbKBinHDdn!wqSzwbKet#uFoALn}K zx_0*+#}7du;NFoxPoAJK>^k=RR{o>{xWaq&{?6gU*x@&X_rCp|%U?*IVFlU{c=dhP zbPhc!MN77PrvN(%yFRxPitgdhIRHu)6ufjH_H-7?*s4Z`xAF%Yh6}yzSGO@qQS$NS z_fb1_@$wJz(D>Bs<-2&~e(Id(r6|*I@Axp|8n$11&2^nStQZPDfD=V`6b=`>A8_bo ze#uS&QTHyHj(d136|5MT3X@ykyhC!ICS&+_hw=eRKK^&eok(8b92U;u&V1*vyU020 zoj&xuZ75xITcKxYr1XvN3_rGM3 zW%##4-?R_??82dRUg}2o@NeCRe`Vb9vvi3N>F1rpC!F6Jbq*bi)eYZj&*uxfhkw4< zdB-bhY{fa#hZ~2R?M3dxzg(7nu5LJKpWd)z&zYZbX>6W5PSG|?wz$D<+OhJ@hT)S#$EK8Qc??_>Je6_|p7Nxk&u_gR+BBl( z;SaMFRq2WDkYT44UmDpiX>eb?<0qm-l*nRXgB1;!x?uQ)!=uyZoj{EEiN>K*RFU)WugYA*x8_Z+-_iVLe%6A77o3MHJJHw-afRzxs@Q$UvmC^)bKdAQZ+K>O=WM(Ko|)de zck}#g&MikEGmIU&j=A`KhvvDp&BG0I@l$opbKP4;l9!h(Y?$jUS=c;xL&?I9xv`Rk z`{xcqp=9AF=W=JdWZ|Q84@mg<+=CK+Y3>N%D9-&7TYqr7$Gxk0uHhMe#WU=|$2dFZ z9s?hEk^2j!z$zt$L%S|`49Ao{#pfPh;Ke=s<1eyAp5c+vQP=s*4vuUn$U}g8=w!NN z>qn*1s;40_IuF&12`IVQGyK{Y2NCcEUYaq_3_mwoi?TSMYy1G|r|<|vjCQkh5Wng< zj;%{?HCSjI0)gK^Kc%YR+0m;hv};v60+EAfn8I+umYYxq&*jgp$PymCgpxZ}^&-qY zgb992KDxwn1?`}1V^wf-8w-poc#4%gj9<_?JbD{6QHj&7CB_vy=MEw$Rf1|~^@vhVTS+km$RO&ZzXRV{@(C!M`z)Y%hq3m4q%5gtAn_3N zrNnss&qESvTfbF!96x0EY|%5I-*xZSz`Hf@ZVkL!1Mk+r|F1P*<3ezp8@v*Coa6l3 zL@3f@1cLn`e{h8%;=x!vTH%kyM4vAd5iOxapD*d}HGD}U6bS?e3;}3_lEJty8Hz>> z;Tb~>f3z;x~JJkw7>YH~hX-PjAu)4)}vHswY;6X;X!_ zvA?w@vDqJ9Vy>&Is>y_!;ZP#!a*1~c#_#oBXE#Mc!L zmito4XnB}Qn_yt&1fz+!j2Yrm$x`7*2O!=WxC9=zG;%NEND zXJ9H65B3EkV>y=L8-wvkFl@wpabKSijp4Paj2k@5st?WJ@+#Y1sKQInvE;P16e*zt zTK{Y-+)!{t@ipyG{9{@^JCQ8h&L4?}o2BF`6?`zKowUk3DLXr?tb?YP?SxNww&#ymLx#b)BNSYMEa&RxPU+@jyHh5N!=@tqmf43pJXrr9y8wDBQ?z zJlQAOI$Ee%P1R~*ORK88NdPt~I&iaMG+ZrW$xU%|T;c6l-{$FXiPf$3tsdbEAnk@u zq?iZ>nPj`m)u6OP-F-f#?`dptF4d_7Auve$-4=9=X29fCXiFUVB^uu;Q$1)|{@skml93Ah)uVHON;iq4;JEvgU2qLH&_PNJnhetZQxc zWMa4KK2s&;I;#~>HCD7;p`#QM6{HJ;XW4KSc+q&w6jy!C5Z8$Fra#;Q3= z#S<9bGD&)?Y4&aHuKEsDsP0(RG9BFL1?~?Af_+h#NPTmIt7Q#r1g0AZ#X~gS22Yzu z(aUPO6lYmYo!I0HC1V?V#CoTv!@FANp$o>WBd>!^i(uqte_uejTCc8ekp`wK#FVw+ z>l-LfhG5>#md^4H4{V$q6r)yhkoQ{2vEqe{jC4~d5G>yq7VA9?uJYB*qBkB)1e3zu z=4y9!h!oW0Vl8yrL@XZc4q_$?32&RL(bZA!me#~fSc$Cblo>H)O8&l>-#_3B1TYYI z>+3t64KmBRLzwIiti?zW4bUHq%W5`TT_)OLovW=w#C>R~q-b-lVT3a0R&^uivaVS= zYraJP1G9cji?`X;$-V*`V|O7)^kH~u!Qj$Y1BfZBe?aBfs@+VML8cPHcwYkLZFjYq zE$y;vR#jG1l~*s7En!wwVqiiHlF8UHR!F8v!WSDrC-!zW3Pi%0S0IjNPkV>h#AqW4 zvAzwQ)^-u=iUc-x32$9%!|FP585Ruw%M!tG_hrFIe<&V}2*;!8bkSq!^yp{O=@K7jCEg!dx6 z7vU#Ag)oHgL-+u~I}u)s@LGh|BK!q}Z%249!rcg8fbb%O7a_a|;rkHYiSSy4&qDag z&qC*M_$S~GAq?TO5PlM6IC>CiA$&W+wS& zA3&VYABBGHClDv}k3xUJLx>alg+BuCRm1`R@J}HF|H4lY2mI(Okby6P-**f;@P~ek zIN^^%uNVH*57X%gbWcI=Zus}YKL*|R!G8mKH^ToCdbh(D!`~0x3izj>+Y0|{=m((x zYv^r(p8@|#=+?kL3*8R*SD}}L&xhXw-MR3OL$?9`b?9FU{nw#)3;Zn>y8-?L^akNe;SWH! z68#v>hW?kK+Xnv==*8jf@P7r}i{L*C-8%Sh zL$?e5U!iv={Mqn_puYh6hoI|*KMK8G_><7v4nGV2Vd&PvzX07%_)+K$z!$+k{2j>u z0KD%b4F2f90Ky+V3J8Dnp8?^Iz61z=^m|AL{?K=kF8uw_Z-D-@(5-=g9J)2|k3+Wx z{&DEmz&{S%8u-VdI~RTrbo1f&Kz{}FzX;t$@Q*-u5&R?2T?GFKbQi%t0^LRMk3jc4 z_`9I{d+6Q;UkUw#&^6!>K-Yjj09^zA0CWxb1JE_#4?wpReh~U6pgRaZANmhMw+wzS zbj#rPLbnWlFLcY`_d>S}elK(j;rpTg8g%>N&xihg=uU^<3Ek=NJE1!rekXLN!|#Od zboian{Udb4@GnC*41YHC{|dTx_&cF%hrbiLcKAD?YlpuRx_0-(h0KW?O zBJ2~~^g0XLI* zcAMN{0F<|O;OFxePtw^1(~1hpa27Pjq1<*OhxcDWBA>QFM9lJ!SWuRie3py+K?y~15b^!jrcCsDWKCst8Bu*&XkC5%B?U)c5w{OAs zW6};FZP|e(cZaa_0cAL$)&5pLAWeO?zt&^FAU)nLY;5;jm&bHrC-1sY?*8va?&0pwCv8#ac43Ixjbc`d71Iam`qxYQ&zlsJd@+t;{xY;6+t*xOF}B1r=` z$1z`zwO1aD$D?s$!Muc#NOdKWp=64C;0srX#x-p%&Suf(>R8*>Dw>+t)Pb7nSyFAx z3lA7`RuPD)?G__oF#=+0BL%u8*$we!HI!R1rb3izG9Um-s}hAe`C7fs7cj706paK7 z>}IF9%`6&y*bg&Y%=HSR-WM?!c(l(*1_zRcj1bW$qefRK z;)`!)TA61JVqB!FjLLy|0|u%LTQ|N4b1e7pnd3feZb2E*^hQw^O3zpzb7Ltk6dsmA zCMk>LkH+Jv7#?YeIEsYDpc(P?1rh%x^AeW`=FDOkhUTHgyG?AGfs^4iY=&0IPiv5c z@yCP6P{1mqF5Xy)##MYrZzw^<9`5gN@+D*|bfrRgxPp?zQxOA|#m0b)KbT0s#PEb> zb2Jq<5}WZ5qpw1AAe6PR2}x$85@=}IY!#v{6)Eosp;-03LH|ah(H9D*7)HH*Up&M| z9kRd%veFgg10!rU1&mFhWG`w(u`J67qiM0rXBmxAm+J1ubDt<0Rp!+1PsQWGz+$62 z8uufH;6Mo5ozkSBXmu0(P<>)^Usp72=*LFg$WSm)VVNRYJ`iN{wstf$Hye01lS1y; z(B07#H_embdS5Jw0>GFo`$R2EySAcwwx^qpj$sgwg3WmerJF8`y9K zVCAUSb;=m4D=U{6sM$zx6N-~SsbN4}L0JtIqCM&JZ(Q#SZ)E*o-?a}`%I5W9zc${7 zn%|t8cb9$wMvb^H#M%s`;;dPW8)$<~hz*oz5A{UQ3E8={;6yw=pM&AYM7yV{-PN>? z(9_&ZP`9?7pwa6RsmR7ibW=pMMh!U@iEE-Mqc4?68sSg`X`(rSb)_PkP$9G~w6Y<4 zW*76+t?JPi4kb6E)o@p|plzX1AB_hWbhIoq8dA}5=7l(D(`Z%NkCe|2m&qkkF+4Kl zpny`gC($oZc|AtEr=`Jg4J3mRjs*#0iLpRFWm$-pOC;kdt39b4#olWsD_+DCO7$^U zH3ptJzH|B+)wfN1ObSnW{`@?&yi*?jV>yR0$)}GP3WA&X$S>5V`VNO+1H0Ci@uC59 zD!rUfJVV_WiNK$OXx9Lk&I`o-Z_8eRQzdrcnVfgEsJkGWXW!~KSN(W zESp&8-#Iwuj)mTE&*Rx!+WX6+3g^*>PC>c4Q`Y=|7u*T!ez?s;Zg=I&{ z9q4Ep7)r4)B#5J)undBQb)y!uWaVUy83SW{c(V+`fQLyNE62>3lM~IvRrAi1H97){ z%gjzZW!L-s=mOTtjN=GbK6rkJY~h^{jIx~15=Lc(p^QBk>A||sT4?n8;(<+APO$ne zLz&jJ>mh=(2nHlPmcz`5_%TpmGQgUIF3)~3-s!Z7V%!GcwnnbLH za4Rm>#DbCf=88lzAfT5g^{AVNKim?D1KTKNIsN`Xr=o4M^R76KV4S8YN`G-GZ9wtF zuTG`ijNrtn^o#h`!T=mWM3sO?0UdxRa4&x1Ur(irAb0HFPNnm~A55Q0ZvgET>GY$3 zBY@upth8Y-7VrRIC*+RVu{Vu)PXO)&y*m&4=AcIao0(odo^b$f0Q@B25x^6Wt1Q6Y zGAh6USOe$Xg8>}*8uoVqPXIm(=>0aHN1TcBV&8Zr;BM?8-wH@K!vex~OPjC_l-g!bpHj3N z?N9#s@W;@$^C{CXMcx(91zH0?`ueH#F|?s5E^REHbydk3n~Da-mFHctXvy4*z?J&f z10H}4;r2J`jEok|}jx30Lfu6R~mu~ApNz*$^5RJgNXSN`33_t@!LX+iY(cHyGm&PH+dfx}V zJnAK`CTZnohu$SppOehi%PTnl~1PiMQu+bmmsO5Wfchse`y z??o8X_%8bWeJEi5s}3i1GwoEzrl@TwK4=@7va@hk!QJ`yxL z7`Zi61NkDoUKO7l zGc21?dE92dLE1QL(X#PdRXS%gf2eWT7c@LFd<1#lkML=T|Fdk!oN)s7S6A$)D|Xiv zd+Uljoy8lR#l6nrn6r4meoyhiDMKaOinpG*W%|xDc1^o`>ODnwPPwCSd%gRm!VQz;Pv>K@baZ$w)be&6kaw4@ctF-V z>K-l7DO&?=f|jYpP>?fVvoc*OPdW+>b=friPB2$Ktm4&q)MdXehwoGL^*MaMqJs+R zw$*$cug;fFU&pWce=h7&>F9X=vrw1E{(cp4ZbozhT2L2A^EI3-T*s&7a^VdsT^+AA z7H>djKRGp>=Evvk<9aL}0RK(!7n0v<^?C#7Zt(5X(&-lVdGPPFTcbkW6=xO~ec2i} z9*4}O=i(g^h%nAO?HlFD(V)ip?;_lZa5)Acd;|C#KaYdI27H+&MjFf`7@M}_-C_4~ z?P0XZSjNVu^QU4wgZx*{wbB`~=RJL{9G$GPtwgxE4Bwb@0#J_gOb7S}uS}=UA+M3? zSo200{7-?8&*-J2+8}r@f~Oy0QhyJ4N3{OxG5tru{~Gu+Ep<7kjVOJY1-B*Ji}0i1 zKMUETm}}M8YOS%Xxx13I2>GU%Tl`sJLp9H^o58k_2G{$FreF^;1 zr}17C*=Xk>d#`Mr1t7WZo{O-KLJ(-{ZT9*z3!8`JQYG(wg;?b}rx&8q<{~&B`EdXA zRQm6*K43gjwux)ByzA{s(m>E+$W?v??_U6?TvPEe+mPMJ(CwQFGA)P)_Z2o z!Z_H()l$Jux$w&~ZPMW6f(p|fYMyH(FI09ze>&dP@Qk$^EECOTV}7>T?Qa$vuv^3o zt8#&TLl%t&w5`penNPTM-cg(F>$WAOU!N(u&wXH~_@M2~J7?PV*lrWzL-rXTnJNC( zUUJV&@wmO@u9@O3d&%oF#MZpmkb^h!&;JR;3%&q+r0|TLW#VgvXMCtkyi_=QZ<%ZEGV#Hw)4o(DJ~S0``6s4MJ6t9{Jr!pDlc{r_D-%Q0ie4xaJErj)`e&xk z`D&SXb$Zc@W#ahs4?_Rn&Ybg&GO?$)=t!B^j~d1LsHE+cGV!C*>K~Vhe>ki3t}^k= zSwF{m;L(|9er2ZZ3o~DZ4F07{=Q(XvVi4q@t!S4mZ@UdW{c+oj`|aWd+j+==ts3Ya zJBE{w*$W=C7d&W}UBz}A8yoG8?;vnEz|;i=TWq5ICdqsE@@@^hTLbUbz`Hf@ZVmiT ztAWak9Wu+u=3B5-(Ov}|3La45|I^C%=j7K0q0Q=hIpWwx5Vu_7l&YNd$$MDQ+#(_U z8AVf;^b?B4_OPU%R5X{94F9^Ku{kg4A1NBQ7?M7&Xe`4eoxi{djQbfDeSxB}+>`mc zRMFEFeWjv#8^k@NR`kW7ZSeo5=(&pigQDjt z+P>IIkN1?6pQ>o?^^z`CH19n~&r&pRWk}Ca^djIa-wlesRMDe~=6x!|!{ts~g(!z7 z{c%O}UWfET{$wW{UWJvkPtlc%{+Obx6#YF#S1WqPWtRRDMTZn!qv)3v&3jdu-%3lK z_uQoKQS{}C{*0n)75yzm<5#Jqe07zjze3Tqiss%Vllba}O4=VVCg3l@Vl7g=(_?Ci&7pe3WoUh-9gIhvrnH;V5;Cv_}MzIZe9-HaBr)(3f_GtVJe zh6?BeoJd~oDPk1hBe`tHd5WV0AFE}%%Ox>W@X=Ye^MJ79?Et+#G*n<0piNvLt`1o8 zHpRagaH9Nd2cPlib*COb_se)n#fh`6cnr{Vhu|r%*P&Ye8{qRp5Pj~!6bi$Q!jrGh zL1>g*tS^u;^7Xk0I-m0tU!Uv1^$*=5#TPmGX;OTBj)NZOIu&1^|DeZ3Q}OkA5Sq{N zlKIi+Js2`ofvq-+ug`tXCgm>>jy@|MhmvmvpY15p7Dw*0A}p-p z_g-kp^IdtmJC(e?50QJV;2%)&?^f}vX-j+-{E71Oq>86zf^u^Pm-TSlcdY{xL9vSp*1euq6+Q?U$(Cr ze;_0s$1};l*^<}wbfe_Y5=YeeNypHy=t4^VlR5IYDE`BWulv_sI{v5?zfO0uac_^5 zpCOLr%==7xqI&+gl%I_kkW~B>ggYep?-9A{-oxMvagHce^93oo?}C4}y;!{ec88?( z{m0M1Kg)i;7*XpsU9LP#+-IYnPt3RE`Th`H5%?E@f3U_OX}$wQ*CgdFJo_3@@?x1o zvOl5%pPEGe2>8s;zMTBLsp8pPZN+m~#WNcj=3Ypys|26%9LcOlEeSreWVwVY*LCWj2w>yRY)b9gtU?8mKiAELayBy-=X*y7VN9!-tGr@?1FcjuJ% z2jE{cCYHh8nk1fDEI`>Vm1;iX{WskX@F%LD2PWbFqvRKhBc~ivpZgSa&ny0k?>Hp= zlA1`4fzNc0sP_1Er51$QBR1YoWBc49hz6(`ed%!QnypU@@iAm!5EcmnsK8K{!>$Go9BLDg%{69^? zKL;Bev|GnMhm8N2GQ|1hBmW1cSm}CfGFU8?@)n+b<@=@+)k7EfEbl=zzG#3Q;?J*fOV06y&`rtGs@#dQdL*4qYEZw|#jK1uxN;6`Jj{9g_J9F#zx zi^u&$x+M7QUoYhJuicZx^NC6LPfWr;7efi#Ww)Bg51wnKJ0E=JN1w}QSZr~Xlox_8 zYG%8ez^9$(+S|?G>oF%=ev6FfbbI(~%A>r8a>{#<^01SAs^954{Hly6J7x=iMez^h ztA5-6Y(#dME-X0X%7xHk7#@NF!;>>!#Vl?2gZZ^&sP0?sY?8xl>C7l z`JYgJf_C|hwS~(uOXlPQRP#+ zB^F=zyEe+Bo;MUYrTigz6~fQ>(Jrx^`n*HMA5--y->rkrKE>ay?jQC1{wc{|k zdkB2i+mW1lduoz+elQ82Fa0t9KkabHc$Znx3(@Y6C#x!=!ZdwdAv5Xg5!eDXtpj2~ z;!C2+(>O~{<6JIG>zjJh#E18$#~eH4*1Wbo*<5RyA)MBSFOj3&-QlP&P_ZeFPrZWi zifGqOJVKG1p%sB(!XFRClF>Nydqf49rc%I1l3C%W$)_R{YFLNOR#cuI7rxF+RPZw~ zY}8H~&Tn=Rh%c&QsG#iZnBC@3FvsskO`>ON9c}W9(eR>Gma(CXNv>qcOCP z)Tz$InCy!Q*lQw*Z{86Y2s$P+~%@q@~N0$xgZE)k^&=S@&fC? zNeY|^o;b^y;Os1GW>zUQOJx~`XLjRE2%K7gvm+9*tc+&`C9KBniU!!|%D@>@mJEBf zA9CGM9U-kw$%Ya>D~LuC!gu^Ut05;8-69(C2Zdz{I4+dKrK)&j>ohjf2U{fofW%=- zV5zpkcfj~GG$30=w_hw)(T-sqCzYBwFS%MGJZPDWHdm8a+uF9Kx!Ki#lU7?K?FrE1XCzg!!SQbHJ2Q<4<-8ix%&OQvT(!n&sqf@2n z^E+G_)`#y^F_>T^z_|p`9#ksIE%i;avCY}yGF`0=rkNQZ)-*Qaz*)1yiNmD9ZMde@ z+2W~3)i$-RHC=9{<8Ejht8H0Tu;8o)kQI%yGR!2;vE?Bp)+y}fwPWX_FAD^dI1Zy~ zdB8eh-N^&i-PZZ)uGV%O@tS*pI+Dauwq_U3G>@=A{$59YXQ#8yV^(1#N8Ihns>{tr zb-05*tKIYVr>=Y3mzb{QW(%sVKe{pKYFpi&Ty8oen@x`DYuc+#)&7nqoR(8>R##MU ziqK}l6VuTQ*5J%Ncut4Itu^kbEadr@WTpG-#8G#k8jk+XVmOZL}btCHvPRDGhtTQW!tu+ zX*1O=PfO1lO<2-QbLX0^YUMwpDrklB6Z7uVE5d4LJ<(zOKxVop%`#2b#Q7^YQe@0j zIikS;Bja*gU5R$y@O4_0>$FWL0hz6ZRcF-kUc+sNK z9qE{8p7ysN*T>9+lWjPP`-rj02sPvHjgPAtNa57)ajes` zSYJJ6HmYB=Cu?2^@?xzBFTrjWafX6y}suKRRQl%`3uOE6-Fi+ky$4t>)leEMDGWYoa1T z(S#hIYdlz$HB?~~lXK>nm93t1lw2xjbvYj*rpx85RaI40jp07zheh*5DUTl`NR=OF zbeK5HDU?ij{H}H%E^_|1jv{ll^^KdmDypj{Fg44#^*AC&%hP6X9V(YMvfJUdC^Pot z=wzBwb!T)@F4j)BDD6{Ee_RG@=P+*1qj8tOXbHVUPRnuGgAP8yqGwz#trT$+ z7474lGjEdAhyn}atsX`ioSMRc!ZN(9MX_ZQ^5$FK<@4~CIMpF6xtW_li^nYCY%m;q zmXlYNA^?sO%o^CxifBU|GskY|3j_lhJ^WG$>}Sk2tl^-llH>e4M`PV!^y(sl8a2R7{;g}W7sE>8Z)c2f)n8RKG@>HLQp3wb2D~-#=83tubAbstQ5WVxFpy8A!k7zNJQ102P%HD1sbZM#-ACW_4ugT z(OO+*=7h2FLYeRQ!HbQ`o4~iVf=p;uhvyKbQq;>z$;zIWgZA{2npTs~iz7aA>u`6K92|rjw+gYCJ z__Zelw$vT$*{-prZwFJBd3442!3geXOq8Be9+~`D}VG$FVN*vDIUoCFoOAbv2%!q~6lF;Riz9)lIQ-Ko&)ZNHWs9ai@W<5`aHL+9ByvEsJ)~#)Az+QozBycRB{6VscWDm~rL&k!=YD=#-fCM## zocR6YcE7EJt&Kq|J&iL><`l`xQt|jv=tq;xBsd5b67rjB}QHpXhu^ja?|4?Jfp^S6edrwMw7oX3-h)I(E^|#WaV{53=<; zm44?W`ucf>)^7mv7T)#O`5#dF14@5H3Ch>Qfaj*)3Lc;NX?^|ugZ^&eM4=S{fel{k z>*tN@z++NcUw==*^R?*o_Y)Rn#i4P+8{zRLTPFYddxe+ryo_#-1%%GOPDjIy2WB0?#_t`uh8eK|JT7)8Aj{^tHY&{}#wlzmCFi`g@Mi zGK<#rNPXH`uKwNNv0ha*!(bes;0$?{Iu>SKnO3$N29d3AD1F1Qq>kx=DJA{9O+(gi zw!XO7VM$(W0i9oq8q2@t--m!~{Zgf0s`UTm|7cAM>U1^t6FK_&d!OTcu^bMsZH#ld z<^MZ`QZiFa_1sT4{Qir^sq@L;IQRFUvi0?Q-3O|z^gG8%^Bs@z?t2-1!wS&v?JcqN zbLDgCA7=C|b^YF7snS1gMPmixnp^%ia`eSgrzN}8QhZhQLmh!`H%@T$Yo zuY8B}ryv}!K*+wuIQ92gN8Fa8L1vcI>1%in#PCW-MqhuwbwcUq)|b}P@IvS_CH8IU z)cYSoy-0O1r^0gMTMT7AHc(%`k9n|#v7)_|Vd?&<)7A0pgG_e%Bc@a8jjXZs9g-Tm zXnhUYOxgOG_e8yx{-6ai5okRP- literal 0 HcmV?d00001 diff --git a/repro.cpp b/repro.cpp new file mode 100644 index 000000000000..5a7a140a8cf0 --- /dev/null +++ b/repro.cpp @@ -0,0 +1,231 @@ +#include +#include +#include + +using namespace sycl; + +int main() { + constexpr size_t size = 1024; + constexpr int num_iters = 5000; + + for (int iter = 0; iter < num_iters; ++iter) { + queue q{default_selector{}}; + + if (iter % 100 == 0) { + std::cout << "Using device: " << q.get_device().get_info() + << " | Iteration: " << iter << "\n"; + } + + float* device_data = malloc_device(size, q); + + std::vector host_input(size, 1.0f); + std::vector host_output1(size, 0.0f); + std::vector host_output2(size, 0.0f); + + + q.memcpy(device_data, host_input.data(), size * sizeof(float)).wait(); + + q.memcpy(host_output1.data(), device_data, size * sizeof(float)).wait(); + q.memcpy(host_output2.data(), device_data, size * sizeof(float)).wait(); + + for (size_t i = 0; i < size; ++i) { + if (host_output1[i] != 1.0f || host_output2[i] != 1.0f) { + std::cerr << "Mismatch at index " << i << " in iteration " << iter << "\n"; + break; + } + } + + free(device_data, q); + } + + return 0; +} + + + +#include +#include +#include + +using namespace sycl; + +int main() { + constexpr size_t size = 1024; + constexpr int num_iters = 5000; + + for (int iter = 0; iter < num_iters; ++iter) { + queue q{default_selector{}}; + + float* device_data = malloc_device(size, q); + + std::vector host_input(size, 1.0f); + std::vector host_output1(size, 0.0f); + std::vector host_output2(size, 0.0f); + + event e1 = q.memcpy(device_data, host_input.data(), size * sizeof(float)); + + event e2 = q.memcpy(host_output1.data(), device_data, size * sizeof(float), {e1}); + + event kernel_event = q.submit([&](handler& h) { + h.depends_on(e2); + h.single_task([]() {}); + }); + + event e3 = q.memcpy(host_output2.data(), device_data, size * sizeof(float), {kernel_event}); + + e3.wait(); + + for (size_t i = 0; i < size; ++i) { + if (host_output1[i] != 1.0f || host_output2[i] != 1.0f) { + std::cerr << "Mismatch at index " << i << " in iteration " << iter << "\n"; + break; + } + } + + if (iter % 100 == 0) { + std::cout << "Iteration " << iter << " completed\n"; + } + + free(device_data, q); + } + + return 0; +} + +// dpcpp -fsycl -I"%CONDA_PREFIX%\include" -o repro.exe .\rep.cpp + + + +#include +#include +#include + +using namespace sycl; + +int main() { + constexpr size_t size = 1024; + constexpr int num_iters = 5000; + + for (int iter = 0; iter < num_iters; ++iter) { + queue q{default_selector{}}; + + float* device_data = malloc_device(size, q); + int* check_flags = malloc_device(size, q); + + std::vector host_input(size, 1.0f); + std::vector host_output1(size, 0.0f); + std::vector host_output2(size, 0.0f); + + event e1 = q.memcpy(device_data, host_input.data(), size * sizeof(float)); + + event e2 = q.memcpy(host_output1.data(), device_data, size * sizeof(float), {e1}); + + event kernel_event = q.submit([&](handler& h) { + h.depends_on(e2); + h.parallel_for(range<1>(size), [=](id<1> i) { + check_flags[i] = (device_data[i] == 1.0f) ? 1 : 0; + }); + }); + + event e3 = q.memcpy(host_output2.data(), device_data, size * sizeof(float), {kernel_event}); + + e3.wait(); + + for (size_t i = 0; i < size; ++i) { + if (host_output1[i] != 1.0f || host_output2[i] != 1.0f) { + std::cerr << "Mismatch at index " << i << " in iteration " << iter << "\n"; + break; + } + } + + if (iter % 100 == 0) { + std::cout << "Iteration " << iter << " completed\n"; + } + + free(device_data, q); + free(check_flags, q); + } + + return 0; +} + + + +#include +#include +#include + +using namespace sycl; + +int main() { + constexpr size_t size = 1024; + constexpr int num_iters = 5000; + + for (int iter = 0; iter < num_iters; ++iter) { + try { + queue q{default_selector{}}; + + device dev = q.get_device(); + if (!dev.has(aspect::usm_device_allocations)) { + std::cerr << "Device does not support USM device allocations. Iteration: " << iter << std::endl; + break; + } + + float* device_data = malloc_device(size, q); + int* check_flags = malloc_device(size, q); + + if (!device_data || !check_flags) { + std::cerr << "Memory allocation failed on iteration " << iter << std::endl; + break; + } + + std::vector host_input(size, 1.0f); + std::vector host_output1(size, 0.0f); + std::vector host_output2(size, 0.0f); + + event e1 = q.memcpy(device_data, host_input.data(), size * sizeof(float)); + + event e2 = q.memcpy(host_output1.data(), device_data, size * sizeof(float), {e1}); + + event kernel_event = q.submit([&](handler& h) { + h.depends_on(e2); + h.parallel_for(range<1>(size), [=](id<1> i) { + check_flags[i] = (device_data[i] == 1.0f) ? 1 : 0; + }); + }); + + event e3 = q.memcpy(host_output2.data(), device_data, size * sizeof(float), {kernel_event}); + + e3.wait(); + q.wait_and_throw(); + + for (size_t i = 0; i < size; ++i) { + if (host_output1[i] != 1.0f || host_output2[i] != 1.0f) { + std::cerr << "Mismatch at index " << i << " in iteration " << iter << std::endl; + break; + } + } + + if (iter % 100 == 0) { + std::cout << "Iteration " << iter << " completed" << std::endl; + } + + free(device_data, q); + free(check_flags, q); + } + catch (sycl::exception const& e) { + std::cerr << "SYCL exception on iteration " << iter << ": " << e.what() << std::endl; + break; + } + catch (std::exception const& e) { + std::cerr << "Standard exception on iteration " << iter << ": " << e.what() << std::endl; + break; + } + catch (...) { + std::cerr << "Unknown error on iteration " << iter << std::endl; + break; + } + } + + return 0; +} diff --git a/test.py b/test.py new file mode 100644 index 000000000000..0264454fc00a --- /dev/null +++ b/test.py @@ -0,0 +1,22 @@ +import dpnp as np +import numpy + + +# x = np.array([2.5,3.0,3.4,3.7], dtype='f8') +# xp = np.array([1, 2, 3], dtype='f8') +# fp = np.array([3 ,2 ,0], dtype='int32') + +x = np.asarray([0, 1, 2, 4, 6, 8, 9, 10], dtype='f8') +xp = np.asarray([1, 3, 5, 7, 9], dtype='f8') +fp = np.sin(xp).astype('f8') + +left=10 +right=20 + +res = np.interp(x,xp,fp,left,right) + +res_np = numpy.interp(np.asnumpy(x), np.asnumpy(xp), np.asnumpy(fp), left, right) + +print(res) +print("numpy") +print(res_np) diff --git a/test_cpp b/test_cpp new file mode 100755 index 0000000000000000000000000000000000000000..6adf3e7060dabea014b06d1b89ac26eac8c2eb63 GIT binary patch literal 101704 zcmeFa30#!b{y#p$qPXCLrD>%}g$1Psm8GROMnRsjL2yYKGtpE`GxaK{bxW`T^(sOoHN6KGpT*=eSKg5Z@*E_XE~qG z=X}oloM%7tOm4DmK#19F@_s^0e=<=jZSAgJt1$d| z?GhE|rQ1VLiI0~?NYNAuO23X|1Wjy2I|<2T@=|Z>r1Wd}Bq~18N{|x;>HVc3CNK5c`P!?I^`;tW@1@J^Qtzeye#B;sU1mOJl0_JLX{-VwzJv7l z`VlVr72hTOitm$tUTRW!dZ`yrV#2>(3eR4liI6Bu8chM8Xi=I>P5m;IUxu>xQm?(2 zda)n6H+%f=X)j&*b?a*unRM%GX+&R;YLl0G+Zzl&;&QGZ?=TNh?FAc$#VV+ms^(4C zO__A}O*dXQWqkK3lcwg(={~2|P2F$0vB%75J+2oiFfX>jIh8W#4pT^;shB#izqg(; z>-izX-r*yEIsRBLQpkJUbh+7NrjDO0@V9ls=KI>bv9MLUXIuB5W4-s|xa6;FqB20S z1%WOIpnn9qK>B1P5=dVeBu=X!_S=HQzc7gXtRV4U3ZkzLqVE|b{;(kW%Y*2b2g(1j zAnpDsh<;BH{oO&@Kb%^u=(vB0&7NMf^6Vs4j`3Hd5I<4SieFrKaLPTZASGGC#BN z$9@&JO+?x7$plM%RTI4_Gjrm7)23$5%pN-crEhYX!I4B=3b-D}dcnVENwojKt~ z>(oivNeRib*y@})nbRlCm^p3g*eR2;@6VhSFN~+%Fg+(bc^vuMhi3O2mwo^A3D$AR z2{$14>`CJ%OhpruvKgUClT#X+9GZQ7d}e0$Ju{}w&YUt~>cs4OGAGQKF>OZr?TkA# zJK?6x%xN>TXG|D-pEu!g&?bz#XY7p3>=|PxWzR&D=s|YE4M=;^IIp#JSmvg-8V$7c?iFf->qj7jFWIb$=kCK1I4C{CX-X=-*>=D2(A zHKB{+?wMg4H#U3XgzTB9!!co;8tMDSGFE1mpE>NNO`kw59H)+(e!mDJ8yK5CVa}v% z6XHym%F&xObE==`%=>4K8#`qR8pO<*VY+X^eegET^crJsjk}k@GPA}`nqop0FdTR7 zwLBW#MDac!H*p*iq?ADYIH^zR-#_z)9`UCB85x#-*35(+H<(gv)_xG7zOhF_f?rLK z-i_)SNwOyM`W`p>xP%7wh6eWf9zA^)Xn|W%2#$VX;$JBEa7fKSn?plB0<;CVmdb*b zp^!sNW>ahYnIW5@X@x)eq3#jvv%LG2^sDY~WrFRi(D7r2_e@nk26Dx__u^39%Ji1f zjjekITuaj@O1^*VyYP=Nty8j*%RO$msY1!waZlisDb%zJGCz#}ysD$zZqg*|T3VQX zQo5%e|0@pb%}upR{!qy1GdjSWb1s?Gf@f@zO3uERa){|-)xqLva=*lLgUi`~`mFcU zxAZDO%l-7N{q)=Y^d0^5Ret);etNf`zKfr}#!nyZr}yqZc^-77pT6UCnSXQW)UH+3 zM*Ha%ltLftr&kaNeY~IEdoIX|UVeIUUN5XorbIuzIImKA-A~V)yq|PGz4x4$bs2to z42r}rLXSUKHKuP#l5&m2F!=8Ix7<@xD3PrRRnetQ4o_aZ;N|MB=) zKmCQ?AfSu=^zHogulwoS`{|eZ=`ZrrFZ0uTFJV|+?5Drj&wjn1{t`cZxu5=0KmB$; zz4scB)m47_7(aWrpZ+pGeT|>KtDpY=u@ZqJ+GQtn$AJjl8NMUajLZDu?2!6$-LW=e zjWDS1xf|;Gu0NpEa1K@9mGV8T^HkO2({(3hZsMMDA#bD1P1{o}&X-H8`o+=@aq|8my zQ!eBol({K-iiJFoGB-ibQX%)F%uUa;Sje|f<|gM^B;@NUb5rx=3Hchz+{8T7g?t5N z?#ex5gnS8QZfc%%Azwh5o0un2$So;z)AGa%c_Z#DWG#X$KIyBs&ajx=1?bsop4+aX zxFP~)#Tg=B&W}5EW3OfSP)w6(N8e+UFxENY?$GrnlcR4XrR=_rzTc41@&}_Fx(tO$ zd)c_F+_lyUf~(s!J@ZI6acI{)ap3<%Fr(leB`@ zgs^>_B{fMacuodJX*!pnNzR-~i|b$qi{nSL%qQ<$+~z%0j+vULJ$#L9j>5V4X`dwT zbXB#~jqtS?jje7lYrtS5q^!H#A<%39>)DD9pvTT|2ez?-VlDp-D4dZ7B<?c~>-9k6o z-v(Bu)fb%M+Ui05ErNArdYhChSXySZL_{nN)9jzO?b;l1i>BEu}iW4^IWo5-3 zu={FXG16@MC98+lC7<0_`o&!nZ z0A!8sI(VDzsteW4>-4Rsv)duS(<(qjeN~p$CCOy+`ZK@0zIPCyoNMpU9er?s+&p8mv=PNwTHEG& zS&BG$(4E)VoHHV9y^m`7ypm5&s81*c9=h>hOq6b9$3)vubgYg?k_wNwO}gu3s0~15 z>qbZ2Q5>NgDG{kgsqQG1fjTl_XB*u)JVKA#sJl*t>c%F$VB6d^hqb6=^koy^uWlpz zZ8J8e7HpXNx^=7Pa6c`o^`5erK*4nLB^VaGezV>ZKQcOf@T$#SC z4xPj&*V?;G%ja#HRO7VObvo4QIBm88bVs>qP*F_0zFgYXYu4VM+qC|>Y~~u9)^As8 z!4KIN3@qxim!s?sKT%(AHA<^PlM{Zr-57P$lv;31caDmQnA^r?{Hz<_*^CqJ_-J&j z;9Hv$kYu;gjqh#7Y4^9s>+5eX3cF4q;S{i?M5GlxxYuMea-&iWw+{1Y&v)3jBDT%A zPB-c>qhs&1jI!Ki8EwhDYmM$ipLC=7YDZ0?$=wnGt;Sj^qjaZ@H4)X3&{$V{yR17N zjEOL1N9t%Vf=LXAkImTUz6{;S;>S0C8;$=x+B>6SqUYISq7(D52YzQ*O!VqRcE6cu zvrGrL(dY;^)ufnsEh@a(c9Y41`2x0;WK*KRYl@G~f^Y$ZEEuK4(<^p$ECYUoR4JzG zMv3kyiPeo^F=JpH4r8rtViX-c-@s98YtlJ*_*@~H#kA9-X`Z2%g~d>B{meUp$q7H` zWx$GBV)-DZ=2eaRdl;F7#aMBE!uhB>evQ@gcfef0iTM{6>+X%cut43ro|&;WbKT5qQwx5|?t$s^6LUGN zwMAC$;@bLpGyzsO)>L0$GdwVz@s-v10@gX(628R@ZyRYS>=QG$jhsO?UIM;``Es~FFxrQWuvja678~;I&ZNC! zS2sX+wvIvX#zu2eVgP$sqH~&Q&c6^`M_pKMj?lv*+f|3!bvc(k&`J4>chyx7m)pDU z3}ck&YQ$?tZd6Gf%GM=WTI|-t+NcT$-CPj%Liq8pHu_8ORg$;wKCETzLF^y2!f@oq z^G!lJey?dYuQOM@i0x+u&mG)3$AAKLHseS4_M-%1&8o$$4r9JXnR_TKWM?r9?vEhR zdeZi^VfW}1wmJ2Bl7-8Sckp*1kXu0+ef z0B5+Cyt%!w)mpTV2lTJ*(^Qx zry{h1k=T<|>qNy<4;Ijr9@gyTfMzR{C5N?=l28EWQUyTtjY1a3Mz#dBD9A2fv_NJbCBi|@b@3?0$S^40@N!XnJw3t+S=k3IyO8HOu2 zUMCpA+|ti*+l&sfHSFlZhXixG_zbsalv_u*T^h`7H;fu5IYzl*28aJJEJ)wpJi{$c zxy8cm?O<-%XSf|cBKsB(w^_m5`kvvoQn~el+ik(znw{a64>x#3dOe)aF2#vDj#gU! zGZ>@1zhLGy(?0IsALm?{Srddg4$R;CO*uKZ0}snd_de*VL@~ZUyN^QPE)N5h9i!Xg z4&tcjuA(LOWYpnMf7-o;F5Lg~z!c>!_C~;d&HVucoLD&4-s5-Vb4MZ%dDy#|Y)mCuT6RR=F*4PZ}v&H_AN8qUH9@Qa2?@KZrA&{h~ZJg;%49-jo>Uq>u z=|)2<(#4kNX8R%%lWj(Oz_b~(Vc*F8jkq2zD^A4rP#=x+F3LQ$iqgy0B+j&$oS`@^ zyv0%1YB?_{HQRc9oJ%i?>^YFrt+h)pE1_#bagDCM*Ai!ypJEp-dB@tQJL|7dhmxLf z#)g5tJ_F{wTm6&Y`K#WfeWPxT@BE1Q;94wQFLRlY&I;@%*_!rV+nfw***d0Zas0`Y zb2|*iK+v6LoXF-hcaOu}kLp9Z;nHiju4L}|3KVz%kHibM=3EIh79rQ(+l?LQm7^|1 zvu#3Pi}qd{-VMMB`WnLpSMI@TU8d+4Oxd5Wa(xNz zW}0$dU&-Cd65a#Md0{0l^Ye@6Jh_s4>DLo;3Rm*R!0XXD^Hy?C^LmI@kc}i(vR9tT z;1&t|1!y%{T4`};R@eI6tnTsgS$#{Rv;{+8cto2Ud29>ev~#!D{e(HZ+G{}Rz$S~4e)oJ~Y>x)tb0K)>-EhPy zcSc48ge?%z3J;$dN5S92ysFtOlsa=c491PW@9_sTGB)9FbC%Zr7_8Tl$Dhkni;>B_ z!Q~Fi>icW!oJgb*9*5>U-$M_>&M`8N?TCy&d*@E9EJ39hovt;xCR1M)TK;oVxjuB> zE-L1E#@uEyxjfD1?K)MD6SShvhrR|e@5s`%xy4D&!R?ZqmX1aVE`HndZcLub-74AG zYSZ4`hQIarD|Y@l%6Wh5WTV3B+8>>2-kNG&lWc6ax~d`Uuo#DR?cKw$E4LUY!JaUV z;KD50YTl8AD-xFn+~HI+u2=R%TU%_0-S#A>HNyGlFj%{OM(xfd8QE$+nQE@E8Czhp z5rOKGjg9n>GS4=4|FIHuLvZ_%&V@)MiI9I*G0xgM@h8fxLJFIj~Z%s zI9#1A6}ys*EtYw^uyJ&;U_->?1g(HiJuDTTm5@wU*RJT)7CWqt;~0c+RHSO3?O2Wz zP*YOe4(-wI&{!*W*^F{)#cuOP22Io}+*bT=;Yw{$a@eX}&sJ^u)f_^<$LlbQf^{hS7S^64%e2 z<+QW!G}k87LUqW%xnaf;81VE48_Fos!;m9mK9^fG`0labLA%C-2H2ue)p>$QBDQ^J z=_a;bS5@e|64R_s^sO?NU}NS+-U=4jqT7LR+ZG!&PfW2T*&|O^_-)wOpo`(kjT+}z zVq3=9*BRR~5Q&X~P1Ego6gIBdo}Y?{fw$?UvM!q2^oQX8Tif)N*h@M}5>t(>sYZ#l ztXRwvPLG0Jzt-0`)CkpPsN^NZh-01^I04o2Jy6x|uvYB18SAYTo;dHMsj{{x@q8{T z)uiLZ+9IcAUe0jul*EP*>bVz&N)cUAt!62=#(8B(D4GW}e5_?_NNjPg)Qc-@7vomK z`c!Om#*UTTOgvY~Ai9|8=$1*Zs6Ho8rXU;g?8X%o#x*I1JK69X1n^%c#nj!&`z(IP z8(g6O0=vkIVFB8n6bI4z-0mh*e0*P%iANx=vp^VezD!p7K>^gn;p=^@S??d#D( z_v^ufcD+TM6`rv*qpI^Xkwk3!tEJm-9Cg~m0!N+8K+ZhsM2K@LbvpSUIqIy#sTKB0 z0na2?$hv4Abyk7@Z*9~6@2GcXbc;pHdP_#Ixp`}hC!@bZ?7aBhc}$AqywygUj1zcm$q#H9!? zYWxl_H=;SuX6U&cUnleM9bT?x<$I_+Lp^q;z@`RQAEcp?4IA70XMGRKoe%(?bYiWa+sm{6rTE=GS+uBjlqJBDBNFoslMK@#nhp47|N4T zk;ieivv&9KC6RSx5OafLS8nN%jK~^MacFt?&tOWA*gKQDc7=oO%JVeb6kQr=F(F3l zmMEsQj3_-h>PYFPQD1_dF0HngfInQiX4JQ!Kft2rDiQJclAfP2;>oDKq-?wIr&R38 zs4P7myKC)k&yu~=_I!uhp`+H8Zn7VR_HB=_T}m%IwmPG&?IZziJqODjel0xbm!r@8Kdr#>&`({81 zRF5wiZ;nFRuC;e>SiT0n%g38L0@2UmuzY=KMTX0Z+-o&J4h>!;f*fD6{_Z2ccI+B? z8N|BMTe9en9UQ%uT z+yG5&`Rh~3!wAcnP+?kxXDB;*nYo-IA0p>w@zVPe8_8n|8q4wTHkQW>^^-Z?i23WS zyp4|KIjs>BYd(C>=eay%sE!+p#xW}%JaoowJFJu040ApVHje4WPTX3zWWq<>>rS6+ z&Txk_G2C=tV;O20Vi{~nA2Qgx%wzb7JBI4>4)AMUNm|^eG0~FwFeXYeZ^dwlG^$>W zslh=J%nLD9l6fkoTr!Wu6iX&QW+@nkSuE&>F^dF!D<)6SS7W9N`a;YYL7$3A7xa;s zL_zao;z6-A#Mm=A%7PU)-zOa7LfXP>eNV zNim&_Q868{r)h^Z0Z~}^&8G$%9e3>xnOLY#aM4SR$|V+P&cY3huGaV)n*mX-I5Eo-7hLa?d}^?MHwnA z+KSyBW{R3HyZd5Q^R}w#s6DKsMHp3GCXFtzyMMV@_J!w!bniGLY8fAVBy2#xwisVq z4BUBMUt6BFB*L5{BDb@?HPWyP&__heOZnzM4nzDs3AAG>?7s@N+l zI?0N@c6TpT@wu$%EGrW2?k=k015xo~J6Ulf`mZW@Cth3eqe8To-Tf2xE}YWyP_c~b zIywHw)~Erz)b9RV8HjCY#l8;O!~HQ}VyZ0nPTC5KQG&^(`kG>QKck$kMbyM&>lB>2 z-91-TT!0D;?VdI=L#y3AUe(kgXmY-q=C@=1D}5F8*f1jGkPgE9R~2Qbz=rXy9GW}q z?n_j~JM=&s?PVJocDG5@JSA&(E2!-l|BKZ4s~R;KN233#W(;bMtqGCM-)VP$sPwvO zGj2W@AWyYto0i$g&{Vh zqWbx~zR_rtD`RiK)}fm>abaL>xo+OB8^?9W@ln~^Z1*1OnC8rG7KIff*>PBlhecnh z=2O`h4k(PdHMMA(Ipvm^KcyN!XYZ?SWi=n`pZDMmW)oVicU7-Kgpe7Fbfet;P&#ZiR+HyJ98R>@uox{a+|kp_J!-7nCu^~4A04aq*|BOL9;^20v1%V5tM>V^@)(fB z080Tl81G>MA2&5qLVIDo)^Z zpHoQfnOb1}tC}iRqc*bEcK7wFW}T{0bGD`3-BH!NrE1iEpq1Ty`a+rX6RJiXG$QTp zA5_g8)*LHV6TOYyy-8JMDi_sHJW_g3)g+^)>9GUbU1!ZZb*xlEux;3ct+@tge+Ck~xgnn6! z3W}84%8?jucOSh#0%5=c6&GlZez>Awt|RR3?W%GL!(OBG2wSZ5!=T50(p+wo)NZRS z;dh{OI@1?6FDRRM5%@9V=<36;wAQI7?ZzgcZlTllE|`+Iy(Id0XuNmC;^g zfTL}+Rn?d5j&997d= z)u;{R3cGu>syU3NnYJ3ROYH6>RkK~ys5627!bSyhBf4|QskV0UMziUU|Mi({TTeBOrhA5~F-3LM7O z)_aTHeU++MAuA#@$6U3E^|rfPsLB^n>B)i~D9yc?%bn^%vfE-|9HcB_mao3V0m2_49%YFsmA8e z59fcH1o(2OFrd5Dn55zSPt`P+HG9-4R65T8RLxIF6}?nDoc?xqqN?~(R$QbxUd0Sz zOG$S3m8$X+RC*p!`Y5}*mD0Z?oc5_b>veYbVO*PWLzpisTFNOMZ+Gud75AY6@P}ym z<5jpJ7=Kln&Sq>%-_h=VRq1;PC+`W=K)c(iD!Q94lCYa%5A`P6Sj zUwA#3jxQ_p;|eog!cmdeH>?NG4O*tVan2(y?Xc$f-r|l}Id&3v@-gZWYx4Sj&zt&M z!EJ~+cye?GD!*f8-`8OKKlUEfVmqfIGNm^``_D@Uc z1D`$4rf4-!>eooCIa2zkuv&?dS5C0vod|U7BWX2SO3w(Zw^4GD8LV!T`hQ8Qo26t3 ztCvuU<-0}UU8VjxX~mlji2N5}^*BndWJ9b-sdq}NFe%+9tQ;uCQn{k=BVP;ehon`t zlrn`?4oY73Bi0V7pC+xgNolaKnurnuODmVukCj%B$)W5otn4T;)o69M)DM+bb}97~ zR=1(LYKD}q7FK;xV#}q~XsN$hTHPU~i-py7D6wy+RX?eZlUBD#sgK^@OtPW zeJQ0~!m1V}&UC3Sk@`c@YMGQOgp~*7Mbhe3sox{5o|jUou=*Bd2Wd55>c5g!4@v10 zVf6*dOQqFhsV|dOjS}}Afa5FRI!W)2(&9yF-$qKgKKstn>cp1<$H%1A5h?vySS>)wFT^5~-BLeK zT7562agZF552CCu3oitTmd0Pm>U?RuUP@Vz%ED=UxwKj?^|Peahf>NAR#Q=4A+7!? z_4i1t#Znq5tj3_cQd&JO^&_R#0x9(oRzpzoBov*PDfKpKHAzZ6gjEvCuF~obssEF- zN|#c!u(}E5Rnp2L_3_fGkCZMDRrnrB!FCZzrufN$J?N z#H%%QankAnsc$B&+DgeStWKl6Mp`wO`Z{RP;}%ldDXeNxUMsCaq<+7&GE3=mVYM4& zH)&N{A*M#9v^pxKRl@2^l-;G(eyQIqt#(OisjymyvWK+VEcM0Gs#HoZ3#(-)uaj2q zOa1%O>Rl;4C9K{+8859~mikwv)k{*EFRWfbnINs6l={C)t3oNw5mt|)yk1((mimR# zYMPWL3#&YoH%P0yr2YYEHB?HYgw+g`H%hD9q<)IDx>-ttgw=SIJ*8DwsUIz^E|pTE zuo{l?CTZ1N>eHoFh?K4uRyxX?rPYybVubohtDmG4Bdl&g*-KjeTk3C=R=cHik+AB9 zvX8X-PU^3cR^Lh~LRejfGErJ>k@}9(YNM1+;5)2k;Ze|8q}58PZz-)llF|WT6#`v9 zY4uO3KLtJJ>OZ9Pov``^Wq)b)q|_gjRz*@O7gj%_Op;a)Nqv>HdO%7g!fHFpWN9^3 z>bFU&iBkGdSZzc(Kw1r#`nA$3T}rPBt4~p;NGprfe=M!~Na3Z~Y1K|j4+^U%P!5zkS5%#~ISD+jst%gW_ zCuucUN~hyESKC21R9fj$-$q(lq*NoUnn5>AS|v!mNm_N2(r#f@hw=_-)miEfLxUN8 zsg%AHR{K#7msXn8{~)a*q_j?0RiYdrt!m2!LSIX(pQN-*SZzj`A+2^u{d#G&MM`f7 zt74RPX|+=7S4gWBQhGsHy^nIFw0ckK-;!2uO6gHy^(x9crPT{k|95Hiyp-~U)n8GL zl2%TsUnH#zDa{a83sK%Bt>#Mo!_q28O5=sq11LvJtGlH>TUw2h(r{rl1!bnRvPpfG zv>G5KU097qIYwIDB=s56>N+Xi0%=|N-=cBZV;5IlgAG?+UngYVl>Ixd$Bgyrnri`+ zPFqAoktMHhE^Z`F{xw7E?~-2WMvZ$FbOpt8+VQ0Y&2a6$C1IPcz2efIC^5?IYlaNx zw_c6)?q{S~+sWq9_*zZHk1lsJy~T|s;#*J=qetQyW1?Ko#V_0Ag0HM39p4SY0_-s$ z`n==3KAU}+ttfHyzORYeNw|R0@ofUfo-Wx}>9ybDd(+W6z8_$m zz;_m`#u2OeTR86!&Wm*O_qzEz6z-+&0Q;mh5VAsqvsXWWBs*$Ys;CJ`hCCo$g7_Faf0vr{*%`6 zQ9cgJ?PHhru}k~drG2VP`&5_qsV?o4T`F2FI#Kko zw=bbct7sJZqP z6?(t^!<^P#z5Y^8cHVv}*%)}r6Dh3y>}^hbp}Dsok5ZZMkDjUB$ZFZ!$SPHxB9g#LSO1?SgaAO2Sgi!Y`AO6rMwZZz6wnld%Ven28UQTTZtxqt|m z&SF1c3_Vf!c`k>iZ~HxG|M~1Lf-R8LFW#j8NWA%rReVYP; z-#4&P@v~IX#fmOcbh@Hrg3v^zkN=%CO8J@om#8;yZ$F|{9^Slau$SkjDhMrB`lY{< z^7#}$dH+lFZq?sEDxISG|3{^>RR8~|G*9*ak4hJ*{{K$o*a^yQ0)2WRk@T*JUKXQoaTF5J&TnRMxiuX zq-4K$fa04$IDxYoU+cIpi}z>7y8Gj<5MM7DwJwxjcB<#vHDnjLEsy$GTk_)lusXh?6KyLRkGHMz-H=5Vvu#`rUwj73h{>UMr5gKE zjlGG-O3m3bQ{#TN8GEo?e(q&S#!0-=2(NI(t4nRhL44K8-IY%P%zN+%BYv3)%_qgx z>Ep`rHn}jogVo&<#%%ZpmlWqlw!Hy{Rx}8Pe2rZ!Gjk5Z z>pi>SUi##)ySSh+0hJh9zMa*Q_h6){ul7{&ax`hOYDt@|<}Zr6?7vMK`~FPMmfJzU>vsMdjPtzKBsw7Vaue2E!NlXI&}&^>X_s>qY}08wuWl)1?Xg#|?CeYzSnX%GjSW`E zuI31~$r~)G@DkihA7V~&4h&6Ncrl)73^pejR=iQw-CTFL@K@5j`gxdK0E-Y*w?%CT zYMY@JenA&xWiG6zmjmzNLD*rCK)ZD4ucwhC1D;;XkX*1VHkwM z0DmQ+o-1I$cG)H>LQsLfk`PY|)!xF3t@Z1%+3EEqo}+w#;(HpBwa5dRpEybC(-fVq z=wph$rf8X>Rf_(?)sFaVA24Xf1C2ZPWfM={F^KP50(F8%72FP zAEEq{l>gPr{{rP-ujpIKe}VF!to+lIe;?)FMftZ>ezl6es{C`6|J}+zMfu;T{5vTB z5asu?qAw`_*~)*U^1n^_cT@hF@;{;IKIQ+E@}H*shbaG>m4A%#Z?60fEBd|ie?<9D zRQ>~%|8>g0lk%@ubieZdM)~I}|4ilIPx)W1{M#wNT19s&|1HY@LFGR}`QM`ayD0zG z%I{}IcPjrf<)5wmt;)Zf@^7pBPb<1h`G2nbmn;7)<$tU4zfAduDnIXeS%vanrTpJi z{u#=@yYi1x{znzvt>Tm_fA4*Y7nT1&<=;j5hbX^&ihim5KT-ZKtM(sL{(Y2xTjl?Y zqC1rTI_1Aa`9G!n=PCal%0E>3{ix_B<-bh%|4sSNSN=1Vf3)&Hs_1s*|C#cCL-`je z|2fKkg7RO310)WMrtAxn3NJ`n*cJy59+k{EfS6H*RR*T)cHW~4k1X;CQxbwoo=$1` zui&cyJnD&qk2qw(EX?x=)OfyTa~7uKP+>C+L7dNL2t1T-$Rd6RO>$Iar$ci z8Twl2Ym*8G)+a5r*0YN|V?=U-I17hMNf2k@XekNeEX=`I1A@W zNf772`AA_tQh5~gQP97F{uT7^pnnH_4fHk8cR=3({TTFP&{d$TK-Yn;1KkX|8T4z= zuR-za2G;LDe+2yz^dRU#P<&b6dJL0!;AxyISx>W*sWKr!oP~EtNf2k@T~ZRnSvWyT zf;bEBlae6L!W=0H;w*efN`g2C7NF4rG+qQ+1o|}S)1WVcz6knH(0_uy4f;0dN1z{p zeg^s(XenqZ=og@0fPMw~73jY~{|&kqbT8;npmJ@lq1RSvW;Xf;bC#$plM5oP`fcNf772d^DbqBpv~M z1oSD;r$Ao-eF5}U&{sj<0(}egL(mUFKL!01bS>yw(9c0X2mKQCOVCQtO3)ube*irI zdI0nn&|lD3EKaeW;&|RJ(-Fj3I8;i4I1BHTk|55)aZ(b*S$MCM1aTJ5l#(FM!oNsK z5a&P#nslIPC#VzjNzf-j{|5Ru&{sfT0eutnP0$ZOKLA|`x)K!c{k9_0ftx@#f#S`( z)(X&XLGfE01NVUL0j&nD20a3L1YJDI;Y2r2dPj4hOiK_a>cprB;w#dKg_jf$2NsH2Mn07%a9^t4vG~ zXW?Ke3F0iYOGyxC;aDjN;w+pbB|)5pf0mLU&O%;~0VF}31LvXfc}T(lH9#K+eH`>T z(C0w^0s0TnC7??{-vfOQbUEm9P#35RbOY!H(5;|bL3e=e0L7bPt-C-ypdQdepoh@e z+=)!G z76BL7`eNdJ)Qku8WIX)J4}ZL8=1yuvENc8>Ib6|f4)I$3M!ZmOyhv|6YD7G00>)QEW01dR8$--w5eRK^SO#-m2Wqb5+i>Oo@v8+dUmKldX-za#; zJ5k}zrnNOK*gU9eSX+Zu*}AsI+EFx%D%m!wWW%TutvjS4=6fa(JV#zO%;BINMh0z{c6*OECx4tF=xqAj>l5k z71K`6DB8)1L_6*DR`6>Z8um8IKC##7xpyL|X5y_u`|vY&Wl?ws zKHjaWzab^SF{B_6v zD9n8PX3RR!T#fnb^G@U;!6Oze*ED+&ph%o?jTU>A7CR_Piya=V#g2~G;;z=>(xSAu z5z$&)W;~)KA_D%(5+zdT)>u6Mv5sr^q?PA8I6G@4o@W4${m89oNS@|cjaJZjqsU02 zap-ybnT!^jm8iu|P1j;)jnQJ~V!QlUsN*IkYH`!jwYb@1w76WTS5rN639>F$8}ozL zvkj&QI*7fz8h>f*c2+c1NPivbawkH{O@rjui5IY2Zq$i}eVy3e@Wqy5?5;racs#{K zP>tX#I^agfqsT(=X;aft%o>Bjm%J{!hDLEv6$#->I^Z(llsgelxzpg3I~z{94P7g0 z+BH0!Y3Q0a(zF4H)X=pQ##LQ&oQR%r87k=~jH1wg)=OV>KbrH5Q?B!i&o}8le4-e* zMt2+plbZ(S=f*R}CT2`GEp|W+rc4#4OgW}Zu@;w7gE>=$Ia7`~Q_QRp1b;s#qNN*} zwj{3~u>DMOd*{o$7+N&mI4+WC64*CmmSV;%#*A5n8Iz|M&C1i_CN9NvS&Zqj2-5|+ z+`O4@`vZETi0?(TN(5&&!A%=b&YmOjgNf8b%1whL($9Xb2~g_$TKwYEY3yDwKYrzI z6Uj3O!wqM+(ZwjTmZF%t7{#nbC}ge;vm*sAB7@u!aLLVtOYTIta(Yyu!_WX_6lsut#>745 zjq*EvRy^AP&&!Zs59IeJAD$sTJaMiBJRvJQdG7Rk@GSW2x!@TjUxnvEh39w3cQf)` z<-=2CSs}_wQSJ>nFFf}>-6Xppc>a1;JR^Z;XXMu%`StbTiJyg1^Al(Dz!S2YZ{+(0^8L(*r^vEGl$D~~8+u-NKJ(PM;d$z;c(w+fosb`n z4paO1@C@_eiK{K(30dLE+W@}@&%IAJ$v7aMLGo329#D8zBHz!E@25UIMV1w!tQ6(m zu=Bz*rD=9S=I6-ab5=`|!jmC-8)<@Pz!k@I3lhlZ*r686;nYr$^zr1Nm-1zRP`h ziYzNcSt-iB&Cd(Z5lypeG(WHMoiEj$iD#p;r3j$e5!uBeyIwvtBYbE^2s9xpG$H$= zd0~^YCElNxr%O0(GJoxV)`XLimvKqoXrnsw{4PMgt#E#)@N_FYwAZeL|ga}#cfZ_Psj>U-Uj_W z+xs0wXMid1H#9oG3zDzGbDzTV8{}Jtd_VEwDYC2(Wu++h;#BiI&ObkSq)B!`@H}=_ zJevW}i;y2K>8IZ0!xKNOtByyw?F&31D?E7{`1jx$-ZbNY+k23F6`nsTJikW1>yYoq zK0HO16{4&Z<=&R(h37=)x#4;Ataye4&-Tc#EAs2d_VHxDYC2(Wu++hwmL67J2cHM$o%}}taye2&vwW!2Kn() zUtVvv_Th=!{=gHm!jq2yeh;1#jB~*=NWKcs9~7QnBHy*h_d_3^BFhR+{01 zY+;k^g5Y`Ntaye3&kK>?mB{Y~AD)puJn`5ActTcq@-fKo!LxnSj04WkAo(ghv5KAd z97b0l-!;hh10S9u%L-9eigIt{dEq&G!MWji_^fz_AioQc-xbJ@w=L!TY~#bTjldJK z!V~iE!m|{2pqm`r100Wnzz#Alc^OJRSD_2o1-2N`Fkyt25$z(WI>xJJEX;k4UqQn4am+ zI-ythlZn?S9IrC;9LpgGVJYNrEQ1`4C6EJZox@LY$Y7A0Rt5;LW-{%2G>f&8nOG;8 zQfrJj)hI)O|7ym=63d2Ii0zGq%G@-p!EA_wxFU{k{IE)MK%-Rr+_8!?rBN0Q-lG5P zPq3x~wt5D>XEu$so`G6jIHG>dqNcqRwvGEnJKys?FS_iKu$5S{7b@Q9pH+8>vA1_V ze^uwzFG7q)@!9{6i9gnX4#3*aL0I=W9BV#D<4aD1j;Zm4L2eori>6?KXc`uWj=;ju zOng5p?O4F^T*Y`;PZ~5H406-3==9v;{Z?whv+|p-NF`vmkil>FMsA;hPzjbh$G0hh z0bB8&KQuwRDYl5`8;M4kd;QfYdBeONCRgJU9(YgGXRlaOMHD@k_w* z{*>{snmAxAROY5(x$&8CeEa9$N+oC(e&ZXg>42@Cxql9XLa@Gbe48Q`sBgagYZG*v zV#)FKBHsvOzwza$c(GpuUW|da9?%c?fcRs{^8hS!9)u;%!?C=1bT!8F2#?OdKLr-K zX;>JYg2m8jSO`4=i=Z>B5foqQZFEfMnw9}884pXR1II#bZW2Z*E2w|+B~;92XT0xQEa_oEGb zL;TF~U%_}-PaZfHYID=D;ygecHU7VuO7JXv;~%u?z^(dk(L7%ch{8Ga-8=qgAQ+^t z-tljQ_HSV89e>}*G#Z13@fV{OU~JU*pAh4J!f*WF<9K0d`~WP6AB3gw!?6s0G=4N_ z&>`>m!y-2g3+Pj@Xg&=K=0{+$e5MC&911l4%NY+#>;uO_ZEhMC;RlGL#{V}{37&;- z{DU?fxYefP9}tCe=(~6P&pbk!dsr4dX9HEx_2Q@jouc|9Hdr z^X^NOSgL^~s=QSYg@tAORv(738HX|o#qekpqvKIzC8C&`j$+mr6lbpX+J{F-`?&lJ z?=MEd)n5C!+G`(Id+nP&21UbaufO@P=(!&8nzkr!Wt6!2M ziM{JN=bgYaP1DRotk;}%i3A0RteV}#54y=YO>~Xszi+*#m5W5Z>phRB#<^^98*O~Q z@*%mz6OZ`ua_si6y0AFVaaBjL(DSkTp_8lAxZbm2HK}j8=OQfH{4-qit-H8{YQaMn zgBOeO1UR;$lp3pXoGsWAw%Fn}qfuXraN}yy$-x>C^V@1{#rIGmtmdt@TO&1*oYBFT zRTjo(8t&NTVqMnoC@fxz!u_*oEMAJnJ*jv+K8(kzE-bG~ ztL~}gnZD(iaw*ol5^mEz^b=<_K^pT>`J5=DUp&i2Fv5CQA7Lqq0Yw3)3A^*4GRe~ z@l%@vrlA;=t9qH0jGKt-&G_x+wCPz%;LFITU_-t|JB^iKx#{GNf}5R4?f|%H@#OY^ zBPe*^xh!2d4de;@IG zs1g35e)yAD_+zD+!XKQ%ADqG;oWdWR!XMl@;2#=b3gP-2X)*Dih}=d*68Qg{QUxS| z{}+@h*&gx#j1t$w3H;xtbP$rj|3yl*kR<*^5`V}Ne||w);xGDp9{4w#c6R(vp&N~s z%K?8I@E=9|rx5=K8{r@3hd+6Rf0z${a0-8L3V(14e{c$aaOZ%(Bfu1r_%rbXL~bJ@ z3H&Q5RX`H>e@>~A?GgV^DRKRr!2c~u2O$akU!YVAN#g&A#2>Q6pWoS*_>2CY2mbT# zJ3Ia-(TzsS>wy35!2eF-e=qU>OC$Wllh9kW|0l2T$4WLe|G_Ez!72Q~Dg41H{K1_A z{-FV;ki?&fR};C7h$Qg;mQn>If&V5-m28jrucXBFb^`x5DIJ6)@c$d7T1XOqr^Fw! z#GhX%m-vhRo(KNTrkoxB6X-^x<#@njee=|DtpdlED8NO0|$A{tG1jkR|?n?T^G?^!GgQZjhd+6RKUVH3{J|;w!72Q~Dg41H{K1_A{*C}s zNaD}Le#k9PkefFoh)kOnfhq+lWX4|F0-jKoauj| z@h_10LzejSbx9I`(ckmHzuCC6;E&rdgML9j!f^HOTd#-#0PJ&zaRA110J0kmKx@AN zAg=}hO9RyafKvkiP7MG!H2~n$0Dx;40KQSE(KWpH2E@Zn=aBdMmwEp{<&A*9Kj0rq_{S0cnT_C&^n;(gf*&gh75v~7{NNP);1vAe6#U>C;17QPC%_bv z^Ph?DA#xiLN#KuH$pC*y0)H%S0sd@{_m3FDuTVM&N#Oq^rCLZ5e}}{$vc#XS zmy-C4{+fJXH9DQ}}~Z_=8jU zgH!l}I|uw70j7||pNa1#avKpz;J=Mh1tfug38hN5NBrNX#PyK^|CcEpge35Pf>JFc ziT}eAf5;MlzVk}rFZz2P_|M0We4g$8&mnZ9(eg>)Zvp;;iT_yQ|K~>dYkv5XSNLP4 zqQW1X!XKQ%ADqG;oWdX6Ip7}}UiPp-6ga2|igqa~;UJgyS2WANVZFT?{U{HAFh64M^WSA|Ww_trnM zmhW?rU&Fz-aroW@t>Xgu7KH_rl*u_J383&m?Hpd8;Gd!9;LQtPR(!Vyo(H^p#<1Y` zSy_iv2H=$<;^7OQ`dH(R%ZD!*l@IBfWqC6n z@gEaD4D z&NAY@*5f!NmG-7vitkyOINvy|)eOkP6Z4cja(Ehg`jSUZ3<*8Wc)v44a6eD3$*ynvK3A7;3R$vr{t z334x!dzl>9rlh=2u7q3(xozaOk=uPmz0y+^ghXCC9~hDIbzsOKvT>FUfsL?gtbypC6zLn9na*Fo5Cug)0U)_vZ7u z0LBYQ8KZ#F3dlW8?rC!WB==8pT>P2x5xG)wrR2UM_Z7LlC}jJ4p$pjlQQ7`cU;Df+ zgz*AW#+c7&^U3{{++WGPM(#CoTwI*;F}Zc*){*;~+}GrOL?PS%5xRiwACv7L^R>_W z0x(`c$`}h6Z2`Gw$UQ^uU*!Hpj*Hb(J|R~|u8bUZxVb6cklTksw!aU$fbG}H_G^9Z z^S%&_7mzZ>LPlFi?pborl6#%p>*V3G$UR4H3ArWYxZWpaIk^qwHjvvvZU;FJ3faB~x-;8%4sX0(1?Ka4 zUl_&5g7s$Ou?k#d$K%4R@xzEXcPOg$%CAkAAWcvr83)Ft&^`bDJ&*y?L zUO>tiMT}NN?nQDhl6#xn+vK=dJLNNSUy%EP+`q~Fo7_(*WcxoscV_!%TaOI)`Fu_c z=LMvU`6#15O78FE{!Z>4a_^AimjhB(k=smeGr8}`eMjyf3YpJA=+4YX`=s&ucbL!5 zRqc~9#$$~37`d0oy+rO^a_^E`O>Q;0E#$V4`<~qQPjmLFE2K?oZ@;kn2J2GIE!ZYlA|x$HKq8X0koJ!A`aJ5Pcq^&wb?XBX<|MyU3-I zOC{HrTwikC$#o~!nOtXbktjrakx==!7cSci_qF#ReIBIG6mnC@jUqRSoQ<4~Tpx0M z$aN#vja(;koyfIDA=+yVm4AC-vOWA_k?P-H=<^r)+)M6Wa(9xulicm(ZYOsuxm(Fy zOYT~7my)}bTq_i!y;e~9w-+kg!&RDU?*aNeK%dFvCX*XUZX~&ZrF0>TpYQJ$z4pY z1q#t#3#k0tGt2hyOGc`HbLcaNKKGEjhg=4^401X-om?++y~xFqizU~QTt{*dC`5Y^ zQ2DiYk#2No_&ydbXVYgkxryW^k{dy81i2J)DdcV@cQd)G$z4sZ1Gx_5nxhcyHHXUA zo^yDpvyCw#6n*nuU(BM1IAa@73JIE!IOD5NoTu*Xc$#o^yo?Lr!;V49V;ZXUtXJ&h5 z|Ms%!lTDxTrbvfxf{scK<-L%SCYGs+=b*qQHb_Jq4Kqdc+z@j8$n_(4J-O@2T|w>&au<-hfLsU)(Ow8tzV@_FF3M^;KmSah zKhtL{xv}I1lN(IVLe4@ifm{N)%gJ3%t}VH?+Jkq!h41@^$>a=s_D=ZSp&NTGBP_#*>hlgPgTZ*+ z8P-+2F~vx(Dl6a*nLukwAN)dw;p%BJMVr!Jd?BOv`s`M+(u~T%FWNI4p9xbfzXC5l z*NZ;t14rHXC=o=94ZkW4_jKY!w1i@#fIppLG)o^kcnF&`X)pJFk={91_I{eb(2#R! z?;`$YO3sC4!jx_3&Zim62pMU=Gs7t(gm+;-ni=7jf;#HLv`0)Z)r&+sx=}y}8G)^^ zxe+#}(DFs2Cgb?BFc9tKC0#(WJ6jBs#mES=7ZGdt=d9%^QF6h~E87@sle zOt!9%6V2mS__RmyS_hNvstUvHwW9Wm7BkNeBiR?D*0CkTv9F7xJ~U^vZq(|opQGpP ztJh4X`Z9}AW7+DqII5=iGad{Hxv(&o=7`BiI-F5J$`gQ$`?2y(; z&Zi=Yho!#6qP=@4NqhGj{C#b4?QfZEw8Py%SCrm+eNMi!fT%nAc0e9@slFAxbJT@s zk0gS@kT5MZB*a2Nu?#oZfI*Ya_c6~+?GrNh`aYp^JNPUTC>Q&hB^l7 zSg5--sH3262X#lNI}y0TW<{;>y?B$UPq_B+hGn={Y0~<+@{6_nzoX;Y$2AG{wxYI&OV3m3xSTR=34$c z(4ylxIhMTB)|_c&1*722Nn|sg<|wM(lXZ%^GrBJ(S&^7<8|UpdX(ml{3pZFXfH|Fw z^)7eot~H2=SNJ=2X)Rpb*qY%-1|91|l8oanPbdZxrkEKP!xkDQbedyVE~N;I>*tG- zjR!+p_jlS3h4nL9cNBf%FFg4Al#OBeKhP)sM2D|Wi!lcO2l~Xh=ke~q^sgM2TE8>B3J1xjv24M&JuldE#t;HNLS6Tj0k+LC$K z8uugdcn0D-yk{-q_bqsADTfVz(sUSpOw+K%^s?cnNAW6iTa2DxoPDzvmlTt(#o1!U zXmP`0rfYF8#N@$2i+eSOcc!(tw_+9x`eDpcK|hTt26f}Hm8sy{xmLZ%787kXc3BN8 zzKrw8|JUC4z{OQv`ClM^NK6REsPX^PMolz9ASA{}GBdysP#_ctK{xssW(JsM{y8%w zL|cm{YT{q&pEg=;siv+~Yq4gnw$!0kW7>7s)UDQ!wHB>)qxBEAHjUN7e$Rd948sef zyT9H2{dRvJ-X}Nb-E;0a_ug~QIrqN%-dq+80gwCKs>kzjSEzG1wCjXA^x+8w*Ls}( z!yokgA@!(w$Kd|NSri;!LT~l~Lw_=M8z90l=0;Z&^#CW%r1gErwgo*vTs4|Fd&kE; zt3Jeu+A6W4%KKBA4XSRKU4i0N*Hz@^_N;mZw5&N-O+{5z*RA>&4!$cCx+fKavd;OLyI&~`Abnu8Mi{nT9i8Q3k=JKp?Z z4+xLbe>IqAcI?^T4QAJ#$}dy?i2~sFFDG#*e%>C6t;?RpbX-k;(-nUsYlKzs)czup zdBduEK*S%CJwmr&>H<{n+B0QAL0aMc)2ZZjt8#VIFu`w|0+-k`^-p(EA5toC;S?OL zf+tAHJ6^8Z@ss@*K`IPrmbJt<_xc4rAa%`w+DX0i_fug;Kiyx6V)zL_1-+R%@vrNw z;kZAR zP=QsTchi3Xp9V31yepVkrN*wUdw&)xG^KvQ4{`eisve;z)q1RFcP~U(u;iKMZ~yLu zXI3BEmxX_ST|HKAqPt;7Q7CkD!Q=7d;@yh|?mhZ(s%L2a>8l5<(!-9A1M{+`efO5+ zVfO|~A3M00=!3_E+EVljPr2)sw-38FIqjP-6imDId7}5-@O)Q_{@iUFum08-A4z(@ z^U%Gof9WMH|J}tMDf*VVe~ovpd?I=7foUr)I`dVc8#`uSTq^Y^wmkiJ9XCIn+}ruk zIp;p}nx_Be)az38zuo;nVR~nZUi|(iw{N-f=Z=3m_si|y z=zCMsOMY`5*FWTF*}UM^XP5rO;X3-rjXeYVHT|`xuT0Uex&F;N9*IBbxaV7Wi`u{b zHqnO)51hmDha7cx_I`Qsd3Plr{&CL--(T?#(R&^q+LYq&y3pzP z{v`L`k)i)d)}JGT{*tW!WCr~wdHj+Y#xKd^pUg1+NwyzH2Kz~}{W&t&Uy|+Dk->hG zZ2yi7_Mc?`ab)nHB>SHuga0Mje;pb8H#s)a6WtBl`KdhlKik)^y?&yT|EK7-_7k1_ zKSj6oU(1L8r|35Q5uN;h01(%(UH_U6|4-3vw+vB&EpEiCD zTm0AbwDEV?<_}Fzvmb|T{?YU_`*YalFHKLgUx#h}Bl=L9{X1;)C(+aW$3FiOJ>CE8 z^S73t=D&`3G^gO&W4z~rPhW@Y>k7v@eSO_N9=0zlNB@VnesuSuBP;5DvHtW=27goX=i6^<`?agLyz~L@=SMtG|LE0~ z;RQb!dcL;o?SkrQ$KU*m8OQ(Ogpt*Mzv$sVFS~O8wu&DWoEE+FE!XAd?sH!J$~hmN z)426IbK9NgF8t`nqhC9C(wv=BU%l?|v)(@`H1opeE?*w{!Hj=Az1>l{cFEi3H=g<7 z>0Ot7@cTdf<(%oO-oClv+K0`LH|Ab+!%3BeIn57jyzG;c2cDyC>-5X{@yTyFZr@os ze@TT8>mz5=$0zG`50&W|zy4am{gri>z{&NOrO;u!|2Tj*Cf8q+>o3bbh9=ivmLf9z zKe_(0t>0ZpK?vEuO`=5*c`wcW zz<<(r9KClm&;Ie;MFU6Pzo1}XeQ-eDTgm&6?JtI(r}vl79KQW*dJlH=w{yR|?(g4s ztn}O%&foi_W9aZZ?o)-IFFk4f5kJ4L@0tO5uO;tSuDoINaeBY`FDETOgWj|Ks_^EI zFQ56o<2du=%RV~hImc6XpYXwZOMC-6syDuJ>Ef3c=lIP@4|Lse(+dvc!1XT-{_Vtp)BSUU&&=x?koRKpeklLm7ygCbk3MlW4=;W99mn8?*Jk8j^lQgf*Vq2B?Gb^b;aM@bKQWvS4+L$YX9wEq`c`ilZQ#}9n(dtW%^xBu|3j=p!UFA03<_m1y>eCF-O`fCP$HS@O*p8e4)??r3db}Vm*l7M3163(WB^H z#zFL6#^WI4CgUjMCgUjME_NW}E_NVxBX%TqBX%TqCw3@yCw3@yD|Re)D|Re)FMc3) zFMc3?BYq@)BYq@)Cw?e?cMbcY_^tS{_^tS{__+lSjRVY|%)npD^{3u{OZ^-NO1b^X z4DBD_{-@rr+xlP1`b%ceUn%Q9_5RzY|56^mq<#EOvEo1}kALd>0NePNvi&48*v|ml zU+Vh2E?>7G%VE;{K@ZVDQ`_%h+ z@q31)%$JlnAb!trpp^YS_5FwVJ-0*RS5o4D_&vvgQuh0??@PwVKZ&17i38&I90y9- z?^EBeh~M)#Nc>Jp91y?fI8e%dpZY#V{GRP#fbAwJap0u!^M}kosqb&Z@7WFq*lv>& z2gL6=4#@nM`o8Ci@$;X|pGk=WuZ^ESOWE&J-w%o3vmchS-zFsv+%$gv=J$w)zL%D` zVB=4I#o{mJIAX6qDSj{Yv)@bmlidCRj$@@9_oV+xj(epX2kiPwK5fnarJ_HMyQ2Rj z>%Ua=ZyUel=h*K#j@!pS$>U!tkYxDo4_`N_|{74F)$g#zrq|__z zu*I*Wv|IXNi+@R8_Y8<0Z1FQGdJ{d`;%`#)F5_T}-$@xa8An_EPs+H99oXiNq}UC| z0sH)u6uVn&twZheS5oYj<3Oox{!5D8a~!bGpGol>p8p4I^KVl8j^jY7ZT?P*-`>K0 zEPl@O{~`N15;r6c2>)#yM{MhlRQ>WCAaP6LfUW(~fBC*G)qh$4NZgb-D*EF%FkoAM zrSvaxSmL(C0U5sq9LH_zKl}I($h;wOz-B+Cw)Lmo{z_%uk~mtC5S zB@Wp9XTY}pw)?|B^U&wuv#Z-0JFo;ZH~7?5?ptV6}`WgRN(R$0f2-?JUd z{3qWRCdKb%9W3i+Sx1ZC%X5IN`y~#D-^)6j{Z^jelQ)l_zXz)8n%p&Jb)(Cyz<*^m zZnef;=WeJjH>+Jsi&e{tvsHQR+GZ6Bb*tLi7FF5O=xTPWc-*fV8#OGZn_^(%{-wBW zbT1`pNkI|eOUxpjv+j#kD6*-|uOP6@t)h{rH=I!QYwOGFnpD&q40Q#SXH&OwHMgkt zqUByyS+u-d#eA`_Pc>9D)K#e9rNmKlAu)6Y{i+J>jU_^=p{bTQi;1b(c%{78) z3k3vMR*Tl^UY4L;(KzGH5;U#8QAnG@;eZG>OPcHIs#DS}-KSJyHd~_r)mUQN0trvB zL&TO7s6bmnRhKnEXYs^FXsxm7d=>BLR*h>~)aJHu zBoN-@Q|rs>)>hXwSHOTG;r2j>sxMz#S6Q8EZL~NLPWWSS{3sw*WqmQ3eM6(Wyoo#2 z9xYlfg?u7!=jbbWzXfQ+}Ix}lo$%S+lAv%I8CZT18b(TyEy zy{o#Z{(Nae8;n_mm%*l$KYWQdl`!mz-atI`QKMe?nC?xEtL~stfEfwGO-uDjM^* z`xD;IfU0kBSGt?ZtF$#yBdkiATXaKAU6VHy^?JKKJ|6->eR+9{t3o$f9Ks}bAT5Ud z7=TUmGYj~gS?4w}4$ba{CKdBwq!OyZwUz|xmRssZ%eAhlciu=S8uY{efGl5ITVLaD zAzy)wk-HFxhY((B5nMtgfF)EVx7gBds>#T9;pZNqIQrY@ZZK;bwQCj?78EThUZzLF zEGk4`f&`5u>C|RInw0TGyWzz3EtLvFf`NEKK}Jn=W0TrUV#9H@z5$rJMip%f`!=?z z`m(x;^UKs((OATLR@@(KKg%E96o^H_YX28UNBeQGVQ90lgJ;C+G&~`Jm^6o)3B-=&hhTK{r5GpbuSzbPduD z)Cqba==q=zUokqmZ!6jZdOzs>pgTcVpbvc+b%EXodKC01=uyxW=z};YvJdng&|5(- z1icXSLeK|sh-MGyt)M$WH$Wdo8-~%wVYIOabSLQfpbw)BLug|^=>4GggYE=cI5egg-jKJ*Z9 z{{R`g!GkpP2b&Kk7gl{vKo?qg2&+6mr;NRNTH3h5>AUXJu0_%27vL;41I3y_`wZynOB;0q&- zfbT}66OkSQ?`ouf25$oC4e%w9rXbx4-UUdz!CQg!OYr)TJ_O%3q`JU5j)q(*59FiS%>uUWoKI_%217iF5~e&p`Sicw3S7fwvWD7<@fQ`AB`> zEkqgwZ#~iw_@YP(sTaHkQa^a>ccJ`)z&!vOQqTQ>NIl;HMC!Q@5UJ3Q%r zAUzM>2Bhb~+ko^ucpH$O2X6z?li)2zx*NR3NH2l61!*sMtC04Bw+d-5c&m{1g0~83 zFLUu7NV~yXg0vgFB}lu$TY|J3yd_Axz&j6V8+hj-{Sdq>k?sfY8A$ho_Y9=_ z!FvYM{op+V>3;B@fpiCWXChq+-kC`K;4MVz1FwP92VMiI54;9aA9xLIH8;QZIP(k$SPGtg^3E*$QXhZP6?g;Ag1gRWB(gQ*CKZoR@pMcrQ z&~kDVP6g6>vhXcBV<7uKe1ziXn6KEm80qK2eyN39%qaH3gn2Npf&!MA5fn7Y5Xnw z(RghS(svFA93U}GM zHR{y(OcjWE%}~6f!0W*VovUt*DvyNY2~Rj-;GsC`>F^{1k+9*9#Ue3d;i+*W-qjXQ z1QK0@UbIqGu5GAw)u;w{)4GN_wWelm8FDkKmlhkR2D^=uR}oM%8ZCs+Lip5-N-EH< zv+XFpyoAcF9AlwdV$%d4a>7ctU?;q=mw9{!wuvHPpW%shbcOubw5ar8AIxx5t5+K3 zp0E*)B#b~fkO+7Zek0N8H>&)es9G2Ibof^q=vE+%b12Mq?B4L16#71`K-}L!lFfMF zQh$2szgv=sdA$CMNN?ChiIcY4W08=N@OLK+EfC@p5u*(UUc|OgUDlpS0$Y{igsw6Q zyHD*lpfYUPc*4|TeF_G(ID}0tFhfjd1iD~8W1()1#kfdSQy+Adx=Y?jEY=mJYGOEM z1B*d3>iig)j`kVZ_@_Y?c9&!c}YmSHMfwpgZHm*&L8*^-;Pv)}wKX zZ|V%hiLoQ*2^pI`aXkucU4dYNdJ^jj8&DPv2FiH-@i?t@2L4ccW412?V={Mtqw*v4E!yBT5}M(3CcRAlzYK zlgVdn4kS9E6-HT>5ya49ch52!qpYjF-5)a|7*yR-ueU1}^ZOPX?U9%l68zl(Y;$Up z0;A<7k#;mZ)D{UEou05S2=;a~$nPt#Oc5jR^V9IwHC5Eq7@>%-3$3GpZjW@)W_co5 z?ujPQ0T`2IKV4z3cED62(oi_z3kD02l(9Y#_C+?wjh1?2dC3xN{~B%|tQ>l6W@9Wa zEL>_pvtj>cv?q>U!+_fSS`7uNG2!uUT<-~PB>iB&H3TcA;q_pjHWq^Bx7ge6?)Lhl zx(0B>JOR?CuPa8HMclv`Y=$)CRAZonj(8#Gl7bWA3{Qd#KdKt5*EG7TS~#UgbmDnGH$4CTHq?Gr8~)E)4r23|YRh;rfHtKsiAVk3 zKszGQIIgi~q(uikJdLn~4xw|s|tQr9+(2lvF6OPjsf!$LCjYR!1SSvZ9j-0yv<3lU=+)^yr8TQT#)PIA} z#Rk0q)Gfg4KUZDFIBop5R9U>(z#FZ_hJMAk*ueY6#Rgt3>Nem#hpho+6m4lTo_O}4 z2tR)EP@ln(KKV~9^S@lr6jsyhx~fD6 zE?}+9NG-JT!FxkAE9R#tOY>RWC@e78*!|%Stoy8mMyDs{+l=J|sqZZGX>Fr~2%1F@ zknlPVGb7|9P#|()O_Ft7&bd4B>|BiH{D;qvj6RF|r+zsy`XS&i0cYVk=R?5K!<5?f ztC7*&z~B1P$Y=%dx4t|wdMn@?uZ@hpL*)Rc9VR z7y2js@ksk$7#Tf{%2Z6(Pm>F9TY_Z#Vq|n5%Bj5k%DnmK%{gpyZjW*td+r%a7n}mD z=HCFg7vl{zq%M-1KpF(zAgn7d|H8anT^D7uF0HT8Et#G(c(_yLNPB8Pqq?hJ92xy1 z>YY+k?IPZ?y!mB$Mp@p%vb;i9-inI6)~(a8pLWC48>ifqb9Hvkby;~UTzQ3>j#4O_ z@5;+{<+V0J7PaFb?5%7%AvK*W4wgh1zaI;Xr{gv;{V5hTMXR$gsCJE^IZ^^ zb7OXPly0FIbkop!eFJ5OQ8qLUWl`_e?3|8i&^cW+##7)|&NoL!<74ztI(-4?eeaBn zy2j||3Z42(e!B0EBctu8M=ru|0Dj?zBco@h;THo>e`BNm&m*HI6-HCNdAVp+ZQB29 z^gTvO>tq+o_n^Gk5$S~fF31;cxMzXuw&8|=OW1H90vEO6auE}Pz#TtZ{nL4h8HmOF-+L#_Ht6*4y6Lq#=qW{7kU3t*qWhz(vD2^2;B~)e6|pB zuFcBp)>?<|F#;`mG%zL@nH74J5N8Z4j%Ww#pYG;1i3owxH_3k%w3iRETxp~9{qJme z59c@7@LtaQ8OqoSUgS!#BgS1%46X>+-bM8@O!+{$k*3$>)K;GTVv@ zIp@yK%YD*{8{1Iknpya60!2vXmDwA02(Mt-yFkASbUhD)-U>X$rvcy}0AAOLIBLxS z+j6eRuFqSDNY$XFP!zT5c?ae1L;0tVw(7w=kUUybt-j4d!TJTGqep;j%_&y=T>$*~ zz~j%5>5Jm;3gCjk$tx?;K@D(G;XhwbYdU`WfZquGkrunwS2y#MELCNd?Zj;_@II7% ziRKI|hNjQ&Yx0J3)92&tNZ!F*7m(TC<(rEcCWKHq(S)tr@CoYLcj{Ols43Ksd021# z68K%DC%y>382I3-(b1o3gSMq4i<^&qhYSgL#YO=(1~Q9?IE} z%_R+xo<+G_ycd5A_f&38-cS}6824M+#h?EIdKhJPq0FVE-_#uB%3HW5uRm++)a&)+ zmSbjV14<8cC|?hjLRwqm-Ffxc_`uRIELqf^tFyB|$TMJv);h}#^hIfT3{=*-v^?a| zg+CH@)Vz%D zTF$&5%vOKSnfcw>>R+e6o;5UE_2tfcb+&pg_pY2*W~*Cf^=2V=&+M6dXRGhdzKz;E zc};3yQUjA3nAE_e1|~HysewriOln|K1Ctt<)WD<$CN=OsMg#pvI<2 z3*TpaI?O-aq6_R@V&&~{XsLy-UuHpti?$0cAGY#zkJo)_X0Z$Z>raF=eFf*SwXgFI z&Qn?9spLFf5$b#`=V{xA=ofJwTXQ-e<~+8hbpDH+$F_~m-^zJx!|42XIgeL)x_v+5 z{4CBt$9dX*q&6Mkd>-eIUTXDc4(H1_kF6(NeiP?uzmxcH;yk_Mp!`#ur#(Z;|AzCl zFGl$fI8S?jl%HB+)kp7HD1Qv+X{(L$Rh*~&Gs^Ge{4tz=jPu8Ge&#Yu-f^5ioAdg; z7I0zCpTP91IDaDNf57>ZIDe4y2ItRMZplBH^NpN8h4USp{~YpJNLx9-fb-XJ{#4H2 z%z4_&rSi9P{&ddwah~=FiGCmF7jgb!&eK*S>E~U}pUL^|vt78e-d_>@6V5NeJ>`p6 zSajNxr2ONYKa2ApaK4c9Mdw)Mi#YG+JiW^&{#~43%K48tU&8s|N{gTNj;Z{+oTt5R z%Aa(uML(PK4$iOOyodAWaQ;coujKq*&eIkmO*W2=UaGv~2U-?cA$iL`X{DxCQvRArV16F&;KItO6ri<*Cu5PB2z0yT~ zPZwSjMhJH2I0-0tWYp@6u^KgxM&$1vyHnXU`{ z5B_g3Wa-V`#N})cl?cM;Y#6_#(qM86IFb4If9)bri$X7%pd6 z$*`GWJHt&3uV(mFhW9djl;IwRFEV_Sp{_LTzuZeE)?{jU0I zqoTh2%rnnI!e_0aC_X!m8O4Rg%L)n$N{oeaSAd(6qT<436o5bFqE<~&AB@_IkRN2h z+gfs*nq=tzu`U3dsQeL{euSdWThcF@Ka@H`(Pt{@7sbtNh1s0fM+Orc0khO`>K8Gq zd=}$>3^-A}ZCu{?qE+6>%Gc$vuZ1^?0b_RM3v z!FZ8#4C4!J_!AlLwBbL;_A#s|{by_$cFdY3<>iC*zI3j_>a}#uqYP z^n3y1SJ?1vz)xf+GG0zw`Hd{6%7&M5tY^IFO~$d+R$j(2YAY|}*ki-XIQBB$K90TF zF0C2quZ&|KgjN_mUFXOn+CSS(!B^zGGama?3aU5p6eH{C8toBPgWgPn% zFa45ne9VTY_2Q?EH1-zeNA}HoWNHu;E4j zg^U+Hi2hgD@S=aG4KMnyXS`kiYWDbcik;^(Uh0)`nQz0(xEMD4mokjY_6+!&GvL3O z0skQIG%xfnx8i{Qb`hTNGsYL5ZQ-T;`x$T8@YAv2B{}&v{Ar9=HoTkh!(6Y(_W@7y zT2*|!J#6Oktt^M06Y1&&ej>ZQhs%G1%j<6wQT|EBFKu)ZyOQP4uYsp}d$`^{ZqR#- zk1{?>b16mNSW|zmbvrdie+PxT&jCMCdsb$^*Rq_QAxqADEe+q;WGEj29=i{t7>02d zebY^NX`c{#7%zE&+Zm7N6@3Z*7RL7rg5ll3Pt-5^W}NiU`kGUh)ZgFWZV-5_PhJx3cbovzoo{5D(pKQewht5`l|aV^nuj#l>Pr{x-tr)GX0wMyfgGT`S*CJp*C;J=gs z{}sY#=c%FBomz_iehGJdx;)mmZ2$88?<0)w=Y7WaH5a}YW4!Di>TjY!dzJCBe|RcJRyxwZ@G1UYXrSXBbnh7UH(w}nS%im8xz?C&&+^drSW*q`kqr4e~~AWX5dNxRkm@D zQF-Vy%I%T(d=1MPCNKRK443C+$gFX_G$>8`o5Cy$!z%_=)Uc zgD!uZ>g0F@^U&928Oq zv)WAf;|bFnjTRs&^TNhN%?i^D;8;=|yt6R~JXM=wH8nvKs;u1J>Bpfh3v6yFziQm!UhqvuG zIm2)IxW9Zi7n>a_{MJQ&KrrFICmKt8et5uH@XEGm=qld zfj;2`7#@01%m7sRsl)7K*#$DroCuPkrnT0NGI5x27c4f~Wy&y; zHKcVrjTE=cqAlVhb7R+n0Qg{vUD1IFDAyhlfn@JoHW2q%Br?wcPTQapRcur^WF+kM zD@*AZCv+pqy+E^A5&&w!2mr-B#Lx2aOKqPXJn1!JLnERL4h%MNYee>Fif~qVg<4nFu(qbgU4b)@YO9;%;3GsDeINv?p`(zZi5MM+q{tQk&9<&~9F-HJ zcmOOPmIRZgql&fJ(C^?VVcm!n!0tS zyNZ!j6%ER)xS-BeTV0-tLu)H5arm=k`oy7SwVU3~jm|YV!3lnZ7egfoOm&Tsr8xA* z?X~bNEhgb{1cVpA%1@OoE?71;2wIlXZ4aiKyHz42gQ}f96)A*BuD@kPdhSxhGQSUf z2GSbQG+VLkDMhiyQMkn%DXLIK%B)^%#uGkNK}Yl`9G2qkj7{7LZ5 z2P8U8eHd2#`QvqtkROkrB0Ba5RiW`=^2$SoY%D0_)a;aJkh!Jl4TM8<35<_UaEQdr zaY5UEwq*zBn6)T^1GE#>AvX?5k2IN9S1h9_IL9istUcHj?{sfUr;W7(BSe!PybEV7 zz;kIrvZ!8k$rFPcKvgLfErkt=>+2yw^dWz@8jmAh*iC`j6c1;xV#_L0GK)y2B|!ge zg0$$6RvAwE(rW`f?b8^i(I|32D4n8iURYEd_j_PSI9v)dZ^QYGiRGJcHbf*=?Q8T@ z``qrvrQ~5`8kjb5rcBik8B}>A3tbY6&F9+i*cq5e01o zghM+^A%Q$hA`YE{hC#3zZn<`w$A^FsPuTndWlFp_bY%P}un&9ndJ_=~0b$}wwNFq` zfy%TJwGSa+;;=70Y-8az81ZhjqM!_?W` z5DQj~ID5<1vqPI>MQEE}z&j_5V<`=#%fUHdehvif86u5n(TXdn*{Gwt+r;rn@d(Wf zi7n=)!ZZgPQzG@476B=L^F|Qh?Lncssn#5OCV-#N6G0O_? zyW-HnMr*Xq5TBmL@W7)AaIPIKdkTQDApW?Abw`#Ni>d+~QRgq{2zM1=E`~aB&YFcn zq*_AHf11|miFc|3-$1HbIU3x5S}4&d82slS`+?`HlLToA7d^kuKV4p?mUq}ngv z1NJjNz4xOF+ljjG!Y})?&A?L;sb9V?bn=FXd{2lX`XXJYUl${T7h|dR%lCWj*q@`T z#sW&(FZBrA2s(WmD0%rl@G<5eVt#5rwNu*9Xhq*AkxqqQzDL|iO2IX#{~!B)Quw9+ z+dwD&GA<|IH@0A(iLO2tOzo#{I_>;70!KQb_UTLZYk4Dx>@)qMy3(&5$fWZd%x^IN zKAXa2e@Wo2HvVDWZyM%}aA~j9DS3f++4$95r&W|U0~b9noEDUN1$z%}(%UcJzb<~m zDuL~;ahJXS-vO2QW&GrQl^V9#7T1nr$6o6A>tSTm`Q?4%;#HR7?zBLP$zFfb#=m#9 zOPAfd+G5iu!6#eZ;&Ro@Apz`1sQ-fvbOi+{U~-)YOw zM=_Em`jht>+nIkK4Q|@yOuMJs6S)3D8(#P+KBiw*tE@f~p7JT*Nh^4e>u=!?xlaU* rVx{z3>}m#Z()KiVRlmuq={cJNOZ`%o!pubcCvCGzo@QgPW!1j{8tL53 literal 0 HcmV?d00001 diff --git a/test_cpp.cpp b/test_cpp.cpp new file mode 100644 index 000000000000..d9a25434a438 --- /dev/null +++ b/test_cpp.cpp @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include + +template +void interp_cpu(const std::vector& x, + const std::vector& idx, + const std::vector& xp, + const std::vector& fp, + const TValue* left, + const TValue* right, + std::vector& out) +{ + std::size_t n = x.size(); + std::size_t xp_size = xp.size(); + + for (std::size_t i = 0; i < n; ++i) + { + TValue left_val = left ? *left : fp[0]; + TValue right_val = right ? *right : fp[xp_size - 1]; + TCoord x_val = x[i]; + std::int64_t insert_idx = idx[i]; + + if (std::isnan(x_val)) { + out[i] = x_val; + } + else if (insert_idx == 0) { + out[i] = left_val; + } + else { + std::int64_t x_idx = insert_idx - 1; + + if (x_val == xp[xp_size - 1]) { + out[i] = right_val; + } + else if (x_idx >= static_cast(xp_size - 1)) { + out[i] = right_val; + } + else if (x_val == xp[x_idx]) { + out[i] = fp[x_idx]; + } + else { + TValue slope = (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); + TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx]; + + if (std::isnan(res)) { + res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; + if (std::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { + res = fp[x_idx]; + } + } + out[i] = res; + } + } + + std::cout << "i=" << i << ", x=" << x[i] + << ", idx=" << idx[i] + << ", result=" << out[i] << std::endl; + } +} + +std::vector searchsorted(const std::vector& xp, const std::vector& x) +{ + std::vector result; + for (const auto& val : x) { + auto it = std::upper_bound(xp.begin(), xp.end(), val); + result.push_back(static_cast(std::distance(xp.begin(), it))); + } + return result; +} + +int main() +{ + std::vector x = {0, 1, 2, 4, 6, 8, 9, 10}; + std::vector fx = {1, 3, 5, 7, 9}; + std::vector fy; + for (double val : fx) + fy.push_back(std::sin(val)); + + std::vector idx = searchsorted(fx, x); + std::vector out(x.size()); + + interp_cpu(x, idx, fx, fy, nullptr, nullptr, out); + + std::cout << "\nFinal output:\n"; + for (double val : out) + std::cout << std::setprecision(6) << val << ", "; + std::cout << std::endl; + + return 0; +} + +// 0.841471, 0.841471, 0.491295, -0.408902, -0.150969, 0.534553, 0.412118, 0.412118, From 9e06cc35968823ce9fc5eecfd63f3b146c204ea6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Apr 2025 04:41:42 -0700 Subject: [PATCH 31/37] Update value_type_of to support const complex type --- dpnp/backend/extensions/common/ext/common.hpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/common.hpp b/dpnp/backend/extensions/common/ext/common.hpp index 695a42fc5c6a..8bd2826122d5 100644 --- a/dpnp/backend/extensions/common/ext/common.hpp +++ b/dpnp/backend/extensions/common/ext/common.hpp @@ -106,18 +106,27 @@ struct IsNan } }; +template +struct value_type_of_impl; + template -struct value_type_of +struct value_type_of_impl { using type = T; }; template -struct value_type_of> +struct value_type_of_impl { - using type = T; + using type = typename std::remove_cv_t::value_type; }; +template +using value_type_of = value_type_of_impl>; + +template +using value_type_of_t = typename value_type_of::type; + size_t get_max_local_size(const sycl::device &device); size_t get_max_local_size(const sycl::device &device, int cpu_local_size_limit, From ba987ddcb7dc0ec13dcecf70381e55d02ccd9890 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Apr 2025 05:52:43 -0700 Subject: [PATCH 32/37] Add check_same_dtype() to validation_utils.hpp --- .../ext/details/validation_utils_internal.hpp | 45 +++++++++++++++++++ .../common/ext/validation_utils.hpp | 4 ++ .../elementwise_functions/interpolate.cpp | 13 ++---- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp index d5a65d3b9961..680a9df3253f 100644 --- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp +++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp @@ -23,6 +23,11 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +#include +#include + +#include "ext/common.hpp" + #include "ext/validation_utils.hpp" #include "utils/memory_overlap.hpp" @@ -163,6 +168,46 @@ inline void check_size_at_least(const array_ptr &arr, } } +inline void check_same_dtype(const array_ptr &arr1, + const array_ptr &arr2, + const array_names &names) +{ + if (arr1 == nullptr || arr2 == nullptr) { + return; + } + + auto array_types = dpctl::tensor::type_dispatch::usm_ndarray_types(); + int first_type_id = array_types.typenum_to_lookup_id(arr1->get_typenum()); + int second_type_id = array_types.typenum_to_lookup_id(arr2->get_typenum()); + + if (first_type_id != second_type_id) { + py::dtype first_dtype = ext::common::dtype_from_typenum(first_type_id); + py::dtype second_dtype = + ext::common::dtype_from_typenum(second_type_id); + + std::string msg = "Arrays " + name_of(arr1, names) + " and " + + name_of(arr2, names) + + " must have the same dtype, but got " + + std::string(py::str(first_dtype)) + " and " + + std::string(py::str(second_dtype)); + + throw py::value_error(msg); + } +} + +inline void check_same_dtype(const std::vector &arrays, + const array_names &names) +{ + if (arrays.size() < 2) { + return; + } + + const auto *first = arrays[0]; + for (size_t i = 1; i < arrays.size(); ++i) { + check_same_dtype(first, arrays[i], names); + } +} + inline void common_checks(const std::vector &inputs, const std::vector &outputs, const array_names &names) diff --git a/dpnp/backend/extensions/common/ext/validation_utils.hpp b/dpnp/backend/extensions/common/ext/validation_utils.hpp index 53b71b07e427..a246559066d1 100644 --- a/dpnp/backend/extensions/common/ext/validation_utils.hpp +++ b/dpnp/backend/extensions/common/ext/validation_utils.hpp @@ -64,6 +64,10 @@ void check_size_at_least(const array_ptr &arr, const size_t size, const array_names &names); +void check_same_dtype(const array_ptr &arr1, + const array_ptr &arr2, + const array_names &names); + void common_checks(const std::vector &inputs, const std::vector &outputs, const array_names &names); diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 5ad3463a2169..fd9f2b98fd78 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -46,6 +46,7 @@ namespace type_utils = dpctl::tensor::type_utils; using ext::common::value_type_of; using ext::validation::array_names; using ext::validation::array_ptr; +using ext::validation::check_same_dtype; using ext::validation::common_checks; namespace dpnp::extensions::ufunc @@ -113,19 +114,13 @@ void common_interpolate_checks( { array_names names = {{&x, "x"}, {&xp, "xp"}, {&fp, "fp"}, {&out, "out"}}; + check_same_dtype(&x, &xp, names); + check_same_dtype(&fp, &out, names); + auto array_types = td_ns::usm_ndarray_types(); - int x_type_id = array_types.typenum_to_lookup_id(x.get_typenum()); int idx_type_id = array_types.typenum_to_lookup_id(idx.get_typenum()); - int xp_type_id = array_types.typenum_to_lookup_id(xp.get_typenum()); int fp_type_id = array_types.typenum_to_lookup_id(fp.get_typenum()); - int out_type_id = array_types.typenum_to_lookup_id(out.get_typenum()); - if (x_type_id != xp_type_id) { - throw py::value_error("x and xp must have the same dtype"); - } - if (fp_type_id != out_type_id) { - throw py::value_error("fp and out must have the same dtype"); - } if (idx_type_id != static_cast(td_ns::typenum_t::INT64)) { throw py::value_error("The type of idx must be int64"); } From cbf49d4700ac82ed15667fce6e3a6aa7ee199e04 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Apr 2025 06:18:21 -0700 Subject: [PATCH 33/37] Add check_has_dtype() to validation_utils.hpp --- .../ext/details/validation_utils_internal.hpp | 34 ++++++++++++++++--- .../common/ext/validation_utils.hpp | 5 +++ .../elementwise_functions/interpolate.cpp | 7 ++-- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp index 680a9df3253f..9f054fd0cc86 100644 --- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp +++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp @@ -31,6 +31,9 @@ #include "ext/validation_utils.hpp" #include "utils/memory_overlap.hpp" +namespace td_ns = dpctl::tensor::type_dispatch; +namespace common = ext::common; + namespace ext::validation { inline sycl::queue get_queue(const std::vector &inputs, @@ -168,6 +171,30 @@ inline void check_size_at_least(const array_ptr &arr, } } +inline void check_has_dtype(const array_ptr &arr, + const typenum_t dtype, + const array_names &names) +{ + if (arr == nullptr) { + return; + } + + auto array_types = td_ns::usm_ndarray_types(); + int array_type_id = array_types.typenum_to_lookup_id(arr->get_typenum()); + int expected_type_id = static_cast(dtype); + + if (array_type_id != expected_type_id) { + py::dtype actual_dtype = common::dtype_from_typenum(array_type_id); + py::dtype dtype_py = common::dtype_from_typenum(expected_type_id); + + std::string msg = "Array " + name_of(arr, names) + " must have dtype " + + std::string(py::str(dtype_py)) + ", but got " + + std::string(py::str(actual_dtype)); + + throw py::value_error(msg); + } +} + inline void check_same_dtype(const array_ptr &arr1, const array_ptr &arr2, const array_names &names) @@ -176,14 +203,13 @@ inline void check_same_dtype(const array_ptr &arr1, return; } - auto array_types = dpctl::tensor::type_dispatch::usm_ndarray_types(); + auto array_types = td_ns::usm_ndarray_types(); int first_type_id = array_types.typenum_to_lookup_id(arr1->get_typenum()); int second_type_id = array_types.typenum_to_lookup_id(arr2->get_typenum()); if (first_type_id != second_type_id) { - py::dtype first_dtype = ext::common::dtype_from_typenum(first_type_id); - py::dtype second_dtype = - ext::common::dtype_from_typenum(second_type_id); + py::dtype first_dtype = common::dtype_from_typenum(first_type_id); + py::dtype second_dtype = common::dtype_from_typenum(second_type_id); std::string msg = "Arrays " + name_of(arr1, names) + " and " + name_of(arr2, names) + diff --git a/dpnp/backend/extensions/common/ext/validation_utils.hpp b/dpnp/backend/extensions/common/ext/validation_utils.hpp index a246559066d1..9e08ad3afe04 100644 --- a/dpnp/backend/extensions/common/ext/validation_utils.hpp +++ b/dpnp/backend/extensions/common/ext/validation_utils.hpp @@ -35,6 +35,7 @@ namespace ext::validation { using array_ptr = const dpctl::tensor::usm_ndarray *; using array_names = std::unordered_map; +using dpctl::tensor::type_dispatch::typenum_t; std::string name_of(const array_ptr &arr, const array_names &names); @@ -64,6 +65,10 @@ void check_size_at_least(const array_ptr &arr, const size_t size, const array_names &names); +void check_has_dtype(const array_ptr &arr, + const typenum_t dtype, + const array_names &names); + void check_same_dtype(const array_ptr &arr1, const array_ptr &arr2, const array_names &names); diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index fd9f2b98fd78..2a3465c186d3 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -46,6 +46,7 @@ namespace type_utils = dpctl::tensor::type_utils; using ext::common::value_type_of; using ext::validation::array_names; using ext::validation::array_ptr; +using ext::validation::check_has_dtype; using ext::validation::check_same_dtype; using ext::validation::common_checks; @@ -116,15 +117,11 @@ void common_interpolate_checks( check_same_dtype(&x, &xp, names); check_same_dtype(&fp, &out, names); + check_has_dtype(&idx, td_ns::typenum_t::INT64, names); auto array_types = td_ns::usm_ndarray_types(); - int idx_type_id = array_types.typenum_to_lookup_id(idx.get_typenum()); int fp_type_id = array_types.typenum_to_lookup_id(fp.get_typenum()); - if (idx_type_id != static_cast(td_ns::typenum_t::INT64)) { - throw py::value_error("The type of idx must be int64"); - } - auto left_v = left ? &left.value() : nullptr; if (left_v) { names.insert({left_v, "left"}); From fa5d07aabafbb89381ce41ffdee78f0491dc216c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Apr 2025 06:24:13 -0700 Subject: [PATCH 34/37] Use check_num_dims for left/right --- .../ufunc/elementwise_functions/interpolate.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 2a3465c186d3..01dc4e203785 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -47,6 +47,7 @@ using ext::common::value_type_of; using ext::validation::array_names; using ext::validation::array_ptr; using ext::validation::check_has_dtype; +using ext::validation::check_num_dims; using ext::validation::check_same_dtype; using ext::validation::common_checks; @@ -125,9 +126,7 @@ void common_interpolate_checks( auto left_v = left ? &left.value() : nullptr; if (left_v) { names.insert({left_v, "left"}); - if (left_v->get_ndim() != 0) { - throw py::value_error("left must be a zero-dimensional array"); - } + check_num_dims(left_v, 0, names); int left_type_id = array_types.typenum_to_lookup_id(left_v->get_typenum()); @@ -140,9 +139,7 @@ void common_interpolate_checks( auto right_v = right ? &right.value() : nullptr; if (right_v) { names.insert({right_v, "right"}); - if (right_v->get_ndim() != 0) { - throw py::value_error("right must be a zero-dimensional array"); - } + check_num_dims(right_v, 0, names); int right_type_id = array_types.typenum_to_lookup_id(right_v->get_typenum()); From 0a4fdff1117b92dee054d578388c36e999707859 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Apr 2025 06:26:31 -0700 Subject: [PATCH 35/37] Use check_same_dtype for left/right --- .../elementwise_functions/interpolate.cpp | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 01dc4e203785..4a6a2c5a4e71 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -120,33 +120,18 @@ void common_interpolate_checks( check_same_dtype(&fp, &out, names); check_has_dtype(&idx, td_ns::typenum_t::INT64, names); - auto array_types = td_ns::usm_ndarray_types(); - int fp_type_id = array_types.typenum_to_lookup_id(fp.get_typenum()); - auto left_v = left ? &left.value() : nullptr; if (left_v) { names.insert({left_v, "left"}); check_num_dims(left_v, 0, names); - - int left_type_id = - array_types.typenum_to_lookup_id(left_v->get_typenum()); - if (left_type_id != fp_type_id) { - throw py::value_error( - "left must have the same dtype as fp and out"); - } + check_same_dtype(left_v, &fp, names); } auto right_v = right ? &right.value() : nullptr; if (right_v) { names.insert({right_v, "right"}); check_num_dims(right_v, 0, names); - - int right_type_id = - array_types.typenum_to_lookup_id(right_v->get_typenum()); - if (right_type_id != fp_type_id) { - throw py::value_error( - "right must have the same dtype as fp and out"); - } + check_same_dtype(right_v, &fp, names); } common_checks({&x, &xp, &fp, left_v, right_v}, {&out}, names); From f368c17e24359bcd478d9bfba9570c4af645d777 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Apr 2025 06:38:16 -0700 Subject: [PATCH 36/37] Add vector vesion of check_num_dims to validation_utils.hpp --- .../common/ext/details/validation_utils_internal.hpp | 9 +++++++++ dpnp/backend/extensions/common/ext/validation_utils.hpp | 3 +++ .../ufunc/elementwise_functions/interpolate.cpp | 6 +----- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp index 9f054fd0cc86..ac3b6ac3201b 100644 --- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp +++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp @@ -145,6 +145,15 @@ inline void check_num_dims(const array_ptr &arr, } } +inline void check_num_dims(const std::vector &arrays, + const size_t ndim, + const array_names &names) +{ + for (const auto &arr : arrays) { + check_num_dims(arr, ndim, names); + } +} + inline void check_max_dims(const array_ptr &arr, const size_t max_ndim, const array_names &names) diff --git a/dpnp/backend/extensions/common/ext/validation_utils.hpp b/dpnp/backend/extensions/common/ext/validation_utils.hpp index 9e08ad3afe04..03d324f6d06d 100644 --- a/dpnp/backend/extensions/common/ext/validation_utils.hpp +++ b/dpnp/backend/extensions/common/ext/validation_utils.hpp @@ -57,6 +57,9 @@ void check_no_overlap(const std::vector &inputs, void check_num_dims(const array_ptr &arr, const size_t ndim, const array_names &names); +void check_num_dims(const std::vector &arrays, + const size_t ndim, + const array_names &names); void check_max_dims(const array_ptr &arr, const size_t max_ndim, const array_names &names); diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 4a6a2c5a4e71..0b71002450da 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -136,11 +136,7 @@ void common_interpolate_checks( common_checks({&x, &xp, &fp, left_v, right_v}, {&out}, names); - if (x.get_ndim() != 1 || xp.get_ndim() != 1 || fp.get_ndim() != 1 || - idx.get_ndim() != 1 || out.get_ndim() != 1) - { - throw py::value_error("All arrays must be one-dimensional"); - } + check_num_dims({&x, &xp, &fp, &idx, &out}, 1, names); if (xp.get_size() != fp.get_size()) { throw py::value_error("xp and fp must have the same size"); From a7d2f505daeae127a97a0c71d30cc85dd1c453d4 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Apr 2025 07:36:56 -0700 Subject: [PATCH 37/37] Add check_same_size to validation_utils.hpp --- .../ext/details/validation_utils_internal.hpp | 36 ++++++++++++++++++- .../common/ext/validation_utils.hpp | 6 ++++ .../elementwise_functions/interpolate.cpp | 11 +++--- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp index ac3b6ac3201b..2ff800aced59 100644 --- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp +++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp @@ -233,7 +233,7 @@ inline void check_same_dtype(const array_ptr &arr1, inline void check_same_dtype(const std::vector &arrays, const array_names &names) { - if (arrays.size() < 2) { + if (arrays.empty()) { return; } @@ -243,6 +243,40 @@ inline void check_same_dtype(const std::vector &arrays, } } +inline void check_same_size(const array_ptr &arr1, + const array_ptr &arr2, + const array_names &names) +{ + if (arr1 == nullptr || arr2 == nullptr) { + return; + } + + auto size1 = arr1->get_size(); + auto size2 = arr2->get_size(); + + if (size1 != size2) { + std::string msg = + "Arrays " + name_of(arr1, names) + " and " + name_of(arr2, names) + + " must have the same size, but got " + std::to_string(size1) + + " and " + std::to_string(size2); + + throw py::value_error(msg); + } +} + +inline void check_same_size(const std::vector &arrays, + const array_names &names) +{ + if (arrays.empty()) { + return; + } + + auto first = arrays[0]; + for (size_t i = 1; i < arrays.size(); ++i) { + check_same_size(first, arrays[i], names); + } +} + inline void common_checks(const std::vector &inputs, const std::vector &outputs, const array_names &names) diff --git a/dpnp/backend/extensions/common/ext/validation_utils.hpp b/dpnp/backend/extensions/common/ext/validation_utils.hpp index 03d324f6d06d..291ff423e3ce 100644 --- a/dpnp/backend/extensions/common/ext/validation_utils.hpp +++ b/dpnp/backend/extensions/common/ext/validation_utils.hpp @@ -76,6 +76,12 @@ void check_same_dtype(const array_ptr &arr1, const array_ptr &arr2, const array_names &names); +void check_same_size(const array_ptr &arr1, + const array_ptr &arr2, + const array_names &names); +void check_same_size(const std::vector &arrays, + const array_names &names); + void common_checks(const std::vector &inputs, const std::vector &outputs, const array_names &names); diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 0b71002450da..096cb16ba61d 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -46,9 +46,11 @@ namespace type_utils = dpctl::tensor::type_utils; using ext::common::value_type_of; using ext::validation::array_names; using ext::validation::array_ptr; + using ext::validation::check_has_dtype; using ext::validation::check_num_dims; using ext::validation::check_same_dtype; +using ext::validation::check_same_size; using ext::validation::common_checks; namespace dpnp::extensions::ufunc @@ -138,17 +140,12 @@ void common_interpolate_checks( check_num_dims({&x, &xp, &fp, &idx, &out}, 1, names); - if (xp.get_size() != fp.get_size()) { - throw py::value_error("xp and fp must have the same size"); - } + check_same_size(&xp, &fp, names); + check_same_size({&x, &idx, &out}, names); if (xp.get_size() == 0) { throw py::value_error("array of sample points is empty"); } - - if (x.get_size() != out.get_size() || x.get_size() != idx.get_size()) { - throw py::value_error("x, idx, and out must have the same size"); - } } std::pair