From 80dc715886aef76971019cd5604b3fa703eebfea Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 29 Apr 2025 18:40:04 +0200 Subject: [PATCH] mostly works --- src/fast_array_utils/conv/__init__.py | 17 +++++++---- src/fast_array_utils/conv/_to_dense.py | 34 +++++++++++++++------ tests/test_to_dense.py | 42 ++++++++++++++++++++++---- 3 files changed, 71 insertions(+), 22 deletions(-) diff --git a/src/fast_array_utils/conv/__init__.py b/src/fast_array_utils/conv/__init__.py index fea40e9..afa818d 100644 --- a/src/fast_array_utils/conv/__init__.py +++ b/src/fast_array_utils/conv/__init__.py @@ -21,25 +21,28 @@ @overload -def to_dense(x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: ... +def to_dense( + x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False +) -> NDArray[Any]: ... @overload -def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[False] = False) -> types.DaskArray: ... +def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.DaskArray: ... @overload -def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ... +def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ... @overload -def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[False] = False) -> types.CupyArray: ... +def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.CupyArray: ... @overload -def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ... +def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ... def to_dense( x: CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix, /, *, + order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False, ) -> NDArray[Any] | types.DaskArray | types.CupyArray: r"""Convert x to a dense array. @@ -52,6 +55,8 @@ def to_dense( ---------- x Input object to be converted. + order + The order of the output array: ``C`` (row-major) or ``F`` (column-major). ``K`` and ``A`` derive the order from ``x``. to_cpu_memory Also load data into memory (resulting in a :class:`numpy.ndarray`). @@ -60,4 +65,4 @@ def to_dense( Dense form of ``x`` """ - return to_dense_(x, to_cpu_memory=to_cpu_memory) + return to_dense_(x, order=order, to_cpu_memory=to_cpu_memory) diff --git a/src/fast_array_utils/conv/_to_dense.py b/src/fast_array_utils/conv/_to_dense.py index 099995d..39ffdeb 100644 --- a/src/fast_array_utils/conv/_to_dense.py +++ b/src/fast_array_utils/conv/_to_dense.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: - from typing import Any + from typing import Any, Literal from numpy.typing import NDArray @@ -22,40 +22,54 @@ def to_dense_( x: CpuArray | GpuArray | DiskArray | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix, /, *, + order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False, ) -> NDArray[Any] | types.CupyArray | types.DaskArray: del to_cpu_memory # it already is - return np.asarray(x) + return np.asarray(x, order=order) @to_dense_.register(types.spmatrix | types.sparray) # type: ignore[call-overload,misc] -def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: +def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]: from . import scipy del to_cpu_memory # it already is - return scipy.to_dense(x) + return scipy.to_dense(x, order=sparse_order(x, order=order)) @to_dense_.register(types.DaskArray) -def _to_dense_dask(x: types.DaskArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray: +def _to_dense_dask(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray: from . import to_dense - x = x.map_blocks(partial(to_dense, to_cpu_memory=to_cpu_memory)) + x = x.map_blocks(partial(to_dense, order=order, to_cpu_memory=to_cpu_memory)) return x.compute() if to_cpu_memory else x # type: ignore[return-value] @to_dense_.register(types.CSDataset) -def _to_dense_ooc(x: types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: +def _to_dense_ooc(x: types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]: from . import to_dense if not to_cpu_memory: msg = "to_cpu_memory must be True if x is an CS{R,C}Dataset" raise ValueError(msg) # TODO(flying-sheep): why is to_memory of type Any? # noqa: TD003 - return to_dense(cast("types.CSBase", x.to_memory())) + return to_dense(cast("types.CSBase", x.to_memory()), order=sparse_order(x, order=order)) @to_dense_.register(types.CupyArray | types.CupySpMatrix) # type: ignore[call-overload,misc] -def _to_dense_cupy(x: GpuArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray: - x = x.toarray() if isinstance(x, types.CupySpMatrix) else x +def _to_dense_cupy(x: GpuArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray: + import cupy as cu + + x = x.toarray(sparse_order(x, order=order)) if isinstance(x, types.CupySpMatrix) else cu.asarray(x, order=order) return x.get() if to_cpu_memory else x + + +def sparse_order(x: types.spmatrix | types.sparray | types.CupySpMatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"]) -> Literal["C", "F"]: + if TYPE_CHECKING: + from scipy.sparse._base import _spbase + + assert isinstance(x, _spbase | types.CSDataset) + + if order in {"K", "A"}: + order = "F" if x.format == "csc" else "C" + return cast("Literal['C', 'F']", order) diff --git a/tests/test_to_dense.py b/tests/test_to_dense.py index 119441c..1e97831 100644 --- a/tests/test_to_dense.py +++ b/tests/test_to_dense.py @@ -3,7 +3,7 @@ from contextlib import nullcontext from importlib.util import find_spec -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import numpy as np import pytest @@ -25,26 +25,33 @@ @pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"]) -def test_to_dense(array_type: ArrayType[Array], *, to_cpu_memory: bool) -> None: +@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K” +def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None: x = array_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32) if not to_cpu_memory and array_type.cls in {types.CSCDataset, types.CSRDataset}: with pytest.raises(ValueError, match="to_cpu_memory must be True if x is an CS{R,C}Dataset"): - to_dense(x, to_cpu_memory=to_cpu_memory) + to_dense(x, order=order, to_cpu_memory=to_cpu_memory) return with WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext(): - arr = to_dense(x, to_cpu_memory=to_cpu_memory) + arr = to_dense(x, order=order, to_cpu_memory=to_cpu_memory) + assert_expected_cls(x, arr, to_cpu_memory=to_cpu_memory) assert arr.shape == (2, 3) + assert_expected_order(x, arr, order=order) @pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"]) -def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, to_cpu_memory: bool) -> None: +@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K” +def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None: src_mtx = coo_matrix_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + with WARNS_NUMBA if not find_spec("numba") else nullcontext(): - arr = to_dense(src_mtx, to_cpu_memory=to_cpu_memory) + arr = to_dense(src_mtx, order=order, to_cpu_memory=to_cpu_memory) + assert_expected_cls(src_mtx, arr, to_cpu_memory=to_cpu_memory) assert arr.shape == (2, 3) + assert_expected_order(src_mtx, arr, order=order) def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -> None: @@ -56,3 +63,26 @@ def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) - assert isinstance(converted, types.CupyArray) case _: assert isinstance(converted, np.ndarray) + + +def assert_expected_order(orig: Array, converted: Array, *, order: Literal["K", "C", "F"]) -> None: + order_expected = get_order(orig) if order == "K" else order + if isinstance(converted, types.DaskArray): + pass # TODO + else: + assert converted.flags.c_contiguous == (order_expected == "C") + assert converted.flags.f_contiguous == (order_expected == "F") + + +def get_order(orig: Array) -> Literal["C", "F"]: + match orig: + case np.ndarray() | types.CupyArray(): + return "C" if orig.flags["C_CONTIGUOUS"] else "F" + case types.spmatrix | types.CupySpMatrix() | types.CSCDataset() | types.CSRDataset(): + if TYPE_CHECKING: + from scipy.sparse._base import _spbase + + assert isinstance(orig, _spbase | types.CSDataset) + + return "C" if orig.format == "csr" else "F" + raise NotImplementedError