Skip to content

Add order parameter to to_dense #94

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/fast_array_utils/conv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,28 @@


@overload
def to_dense(x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: ...
def to_dense(
x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False
) -> NDArray[Any]: ...


@overload
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
@overload
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...


@overload
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
@overload
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...


def to_dense(
x: CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
/,
*,
order: Literal["K", "A", "C", "F"] = "K",
to_cpu_memory: bool = False,
) -> NDArray[Any] | types.DaskArray | types.CupyArray:
r"""Convert x to a dense array.
Expand All @@ -52,6 +55,8 @@ def to_dense(
----------
x
Input object to be converted.
order
The order of the output array: ``C`` (row-major) or ``F`` (column-major). ``K`` and ``A`` derive the order from ``x``.
to_cpu_memory
Also load data into memory (resulting in a :class:`numpy.ndarray`).

Expand All @@ -60,4 +65,4 @@ def to_dense(
Dense form of ``x``

"""
return to_dense_(x, to_cpu_memory=to_cpu_memory)
return to_dense_(x, order=order, to_cpu_memory=to_cpu_memory)
34 changes: 24 additions & 10 deletions src/fast_array_utils/conv/_to_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


if TYPE_CHECKING:
from typing import Any
from typing import Any, Literal

from numpy.typing import NDArray

Expand All @@ -22,40 +22,54 @@ def to_dense_(
x: CpuArray | GpuArray | DiskArray | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
/,
*,
order: Literal["K", "A", "C", "F"] = "K",
to_cpu_memory: bool = False,
) -> NDArray[Any] | types.CupyArray | types.DaskArray:
del to_cpu_memory # it already is
return np.asarray(x)
return np.asarray(x, order=order)


@to_dense_.register(types.spmatrix | types.sparray) # type: ignore[call-overload,misc]
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
from . import scipy

del to_cpu_memory # it already is
return scipy.to_dense(x)
return scipy.to_dense(x, order=sparse_order(x, order=order))


@to_dense_.register(types.DaskArray)
def _to_dense_dask(x: types.DaskArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
def _to_dense_dask(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
from . import to_dense

x = x.map_blocks(partial(to_dense, to_cpu_memory=to_cpu_memory))
x = x.map_blocks(partial(to_dense, order=order, to_cpu_memory=to_cpu_memory))
return x.compute() if to_cpu_memory else x # type: ignore[return-value]


@to_dense_.register(types.CSDataset)
def _to_dense_ooc(x: types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
def _to_dense_ooc(x: types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
from . import to_dense

if not to_cpu_memory:
msg = "to_cpu_memory must be True if x is an CS{R,C}Dataset"
raise ValueError(msg)
# TODO(flying-sheep): why is to_memory of type Any? # noqa: TD003
return to_dense(cast("types.CSBase", x.to_memory()))
return to_dense(cast("types.CSBase", x.to_memory()), order=sparse_order(x, order=order))


@to_dense_.register(types.CupyArray | types.CupySpMatrix) # type: ignore[call-overload,misc]
def _to_dense_cupy(x: GpuArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
x = x.toarray() if isinstance(x, types.CupySpMatrix) else x
def _to_dense_cupy(x: GpuArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
import cupy as cu

x = x.toarray(sparse_order(x, order=order)) if isinstance(x, types.CupySpMatrix) else cu.asarray(x, order=order)
return x.get() if to_cpu_memory else x


def sparse_order(x: types.spmatrix | types.sparray | types.CupySpMatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"]) -> Literal["C", "F"]:
if TYPE_CHECKING:
from scipy.sparse._base import _spbase

assert isinstance(x, _spbase | types.CSDataset)

if order in {"K", "A"}:
order = "F" if x.format == "csc" else "C"
return cast("Literal['C', 'F']", order)
42 changes: 36 additions & 6 deletions tests/test_to_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from contextlib import nullcontext
from importlib.util import find_spec
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Literal

import numpy as np
import pytest
Expand All @@ -25,26 +25,33 @@


@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
def test_to_dense(array_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
x = array_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
if not to_cpu_memory and array_type.cls in {types.CSCDataset, types.CSRDataset}:
with pytest.raises(ValueError, match="to_cpu_memory must be True if x is an CS{R,C}Dataset"):
to_dense(x, to_cpu_memory=to_cpu_memory)
to_dense(x, order=order, to_cpu_memory=to_cpu_memory)
return

with WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext():
arr = to_dense(x, to_cpu_memory=to_cpu_memory)
arr = to_dense(x, order=order, to_cpu_memory=to_cpu_memory)

assert_expected_cls(x, arr, to_cpu_memory=to_cpu_memory)
assert arr.shape == (2, 3)
assert_expected_order(x, arr, order=order)


@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
src_mtx = coo_matrix_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)

with WARNS_NUMBA if not find_spec("numba") else nullcontext():
arr = to_dense(src_mtx, to_cpu_memory=to_cpu_memory)
arr = to_dense(src_mtx, order=order, to_cpu_memory=to_cpu_memory)

assert_expected_cls(src_mtx, arr, to_cpu_memory=to_cpu_memory)
assert arr.shape == (2, 3)
assert_expected_order(src_mtx, arr, order=order)


def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -> None:
Expand All @@ -56,3 +63,26 @@ def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -
assert isinstance(converted, types.CupyArray)
case _:
assert isinstance(converted, np.ndarray)


def assert_expected_order(orig: Array, converted: Array, *, order: Literal["K", "C", "F"]) -> None:
order_expected = get_order(orig) if order == "K" else order
if isinstance(converted, types.DaskArray):
pass # TODO
else:
assert converted.flags.c_contiguous == (order_expected == "C")
assert converted.flags.f_contiguous == (order_expected == "F")


def get_order(orig: Array) -> Literal["C", "F"]:
match orig:
case np.ndarray() | types.CupyArray():
return "C" if orig.flags["C_CONTIGUOUS"] else "F"
case types.spmatrix | types.CupySpMatrix() | types.CSCDataset() | types.CSRDataset():
if TYPE_CHECKING:
from scipy.sparse._base import _spbase

assert isinstance(orig, _spbase | types.CSDataset)

return "C" if orig.format == "csr" else "F"
raise NotImplementedError
Loading