diff --git a/docs/construct.rst b/docs/construct.rst index c2db5278..ea81b416 100644 --- a/docs/construct.rst +++ b/docs/construct.rst @@ -14,12 +14,12 @@ matrix: .. code-block:: python - import sparse + >>> import sparse - coords = [[0, 1, 2, 3, 4], - [0, 1, 2, 3, 4]] - data = [10, 20, 30, 40, 50] - s = sparse.COO(coords, data) + >>> coords = [[0, 1, 2, 3, 4], + ... [0, 1, 2, 3, 4]] + >>> data = [10, 20, 30, 40, 50] + >>> s = sparse.COO(coords, data, shape=(5, 5)) >>> s.todense() array([[10, 0, 0, 0, 0], @@ -28,13 +28,25 @@ matrix: [ 0, 0, 0, 40, 0], [ 0, 0, 0, 0, 50]]) - In general :code:`coords` should be a :code:`(ndim, nnz)` shaped array. Each row of :code:`coords` contains one dimension of the desired sparse array, and each column contains the index corresponding to that nonzero element. :code:`data` contains the nonzero elements of the array corresponding to the indices -in :code:`coords`. Its shape should be :code:`(nnz,)` +in :code:`coords`. Its shape should be :code:`(nnz,)`. + +If ``data`` is the same across all the coordinates, it can be passed +in as a scalar. For example, the following produces the :math:`4 \times 4` +identity matrix: + +.. code-block:: python + + >>> import sparse + + >>> coords = [[0, 1, 2, 3], + ... [0, 1, 2, 3]] + >>> data = 1 + >>> s = sparse.COO(coords, data, shape=(4, 4)) You can, and should, pass in :obj:`numpy.ndarray` objects for :code:`coords` and :code:`data`. diff --git a/docs/generated/sparse.COO.asformat.rst b/docs/generated/sparse.COO.asformat.rst new file mode 100644 index 00000000..e4bb9efe --- /dev/null +++ b/docs/generated/sparse.COO.asformat.rst @@ -0,0 +1,6 @@ +COO.asformat +============ + +.. currentmodule:: sparse + +.. automethod:: COO.asformat \ No newline at end of file diff --git a/docs/generated/sparse.COO.from_iter.rst b/docs/generated/sparse.COO.from_iter.rst new file mode 100644 index 00000000..f5f3d164 --- /dev/null +++ b/docs/generated/sparse.COO.from_iter.rst @@ -0,0 +1,6 @@ +COO.from\_iter +============== + +.. currentmodule:: sparse + +.. automethod:: COO.from_iter \ No newline at end of file diff --git a/docs/generated/sparse.COO.rst b/docs/generated/sparse.COO.rst index d2438ab7..d0a1c9f2 100644 --- a/docs/generated/sparse.COO.rst +++ b/docs/generated/sparse.COO.rst @@ -24,6 +24,7 @@ COO .. autosummary:: :toctree: + COO.from_iter COO.from_numpy COO.from_scipy_sparse @@ -48,6 +49,7 @@ COO .. autosummary:: :toctree: + COO.asformat COO.todense COO.maybe_densify COO.to_scipy_sparse diff --git a/docs/generated/sparse.DOK.asformat.rst b/docs/generated/sparse.DOK.asformat.rst new file mode 100644 index 00000000..bdd0480a --- /dev/null +++ b/docs/generated/sparse.DOK.asformat.rst @@ -0,0 +1,6 @@ +DOK.asformat +============ + +.. currentmodule:: sparse + +.. automethod:: DOK.asformat \ No newline at end of file diff --git a/docs/generated/sparse.DOK.rst b/docs/generated/sparse.DOK.rst index 80f6ef16..e9450e3e 100644 --- a/docs/generated/sparse.DOK.rst +++ b/docs/generated/sparse.DOK.rst @@ -31,6 +31,8 @@ DOK .. rubric:: Methods .. autosummary:: :toctree: + + DOK.asformat DOK.from_coo diff --git a/docs/generated/sparse.SparseArray.asformat.rst b/docs/generated/sparse.SparseArray.asformat.rst new file mode 100644 index 00000000..9b9e7878 --- /dev/null +++ b/docs/generated/sparse.SparseArray.asformat.rst @@ -0,0 +1,6 @@ +SparseArray.asformat +==================== + +.. currentmodule:: sparse + +.. automethod:: SparseArray.asformat \ No newline at end of file diff --git a/docs/generated/sparse.SparseArray.rst b/docs/generated/sparse.SparseArray.rst index 382a1dc3..031e918e 100644 --- a/docs/generated/sparse.SparseArray.rst +++ b/docs/generated/sparse.SparseArray.rst @@ -14,3 +14,9 @@ SparseArray SparseArray.nnz SparseArray.size + .. rubric:: Methods + .. autosummary:: + :toctree: + + SparseArray.asformat + diff --git a/docs/generated/sparse.as_coo.rst b/docs/generated/sparse.as_coo.rst new file mode 100644 index 00000000..a39a2690 --- /dev/null +++ b/docs/generated/sparse.as_coo.rst @@ -0,0 +1,6 @@ +as\_coo +======= + +.. currentmodule:: sparse + +.. autofunction:: as_coo \ No newline at end of file diff --git a/docs/generated/sparse.rst b/docs/generated/sparse.rst index 451f65b3..36922bda 100644 --- a/docs/generated/sparse.rst +++ b/docs/generated/sparse.rst @@ -23,6 +23,8 @@ API .. autosummary:: :toctree: + as_coo + concatenate dot diff --git a/sparse/coo/__init__.py b/sparse/coo/__init__.py index c47dbe5e..13bf5652 100644 --- a/sparse/coo/__init__.py +++ b/sparse/coo/__init__.py @@ -1,4 +1,4 @@ -from .core import COO +from .core import COO, as_coo from .umath import elemwise from .common import tensordot, dot, concatenate, stack, triu, tril, where, \ nansum, nanprod, nanmin, nanmax, nanreduce diff --git a/sparse/coo/core.py b/sparse/coo/core.py index 33c427e2..2cda2d69 100644 --- a/sparse/coo/core.py +++ b/sparse/coo/core.py @@ -1,4 +1,4 @@ -from collections import Iterable, defaultdict, deque +from collections import Iterable, Iterator, Sized, defaultdict, deque import numpy as np import scipy.sparse @@ -23,9 +23,10 @@ class COO(SparseArray, NDArrayOperatorsMixin): ---------- coords : numpy.ndarray (COO.ndim, COO.nnz) An array holding the index locations of every value - Should have shape (number of dimensions, number of non-zeros) + Should have shape (number of dimensions, number of non-zeros). data : numpy.ndarray (COO.nnz,) - An array of Values + An array of Values. A scalar can also be supplied if the data is the same across + all coordinates. If not given, defers to :obj:`as_coo`. shape : tuple[int] (COO.ndim,) The shape of the array. has_duplicates : bool, optional @@ -52,6 +53,7 @@ class COO(SparseArray, NDArrayOperatorsMixin): See Also -------- DOK : A mostly write-only sparse array. + as_coo : Convert any given format to :obj:`COO`. Examples -------- @@ -128,6 +130,16 @@ class COO(SparseArray, NDArrayOperatorsMixin): >>> s4 + If the data is same across all coordinates, you can also specify a scalar. + + >>> coords = [[0, 0, 0, 1, 1], + ... [0, 1, 2, 0, 3], + ... [0, 3, 2, 0, 1]] + >>> data = 1 + >>> s5 = COO(coords, data, shape=(3, 4, 5)) + >>> s5 + + Following scipy.sparse conventions you can also pass these as a tuple with rows and columns @@ -158,14 +170,14 @@ class COO(SparseArray, NDArrayOperatorsMixin): You can convert :obj:`DOK` arrays to :obj:`COO` arrays. >>> from sparse import DOK - >>> s5 = DOK((5, 5), dtype=np.int64) - >>> s5[1:3, 1:3] = [[4, 5], [6, 7]] - >>> s5 - - >>> s6 = COO(s5) + >>> s6 = DOK((5, 5), dtype=np.int64) + >>> s6[1:3, 1:3] = [[4, 5], [6, 7]] >>> s6 + + >>> s7 = s6.asformat('coo') + >>> s7 - >>> s6.todense() # doctest: +NORMALIZE_WHITESPACE + >>> s7.todense() # doctest: +NORMALIZE_WHITESPACE array([[0, 0, 0, 0, 0], [0, 4, 5, 0, 0], [0, 6, 7, 0, 0], @@ -179,61 +191,27 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True, self._cache = None if cache: self.enable_caching() + if data is None: - from ..dok import DOK - - if isinstance(coords, COO): - self._make_shallow_copy_of(coords) - return - - if isinstance(coords, DOK): - shape = coords.shape - coords = coords.data - - # {(i, j, k): x, (i, j, k): y, ...} - if isinstance(coords, dict): - coords = list(coords.items()) - has_duplicates = False - - if isinstance(coords, np.ndarray): - result = COO.from_numpy(coords) - self._make_shallow_copy_of(result) - return - - if isinstance(coords, scipy.sparse.spmatrix): - result = COO.from_scipy_sparse(coords) - self._make_shallow_copy_of(result) - return - - # [] - if not coords: - data = [] - coords = [] - - # [((i, j, k), value), (i, j, k), value), ...] - elif isinstance(coords[0][0], Iterable): - if coords: - assert len(coords[0]) == 2 - data = [x[1] for x in coords] - coords = [x[0] for x in coords] - coords = np.asarray(coords).T - - # (data, (row, col, slab, ...)) - else: - data = coords[0] - coords = np.stack(coords[1], axis=0) + arr = as_coo(coords, shape=shape) + self._make_shallow_copy_of(arr) + return self.data = np.asarray(data) self.coords = np.asarray(coords) + if self.coords.ndim == 1: self.coords = self.coords[None, :] + if self.data.ndim == 0: + self.data = np.broadcast_to(self.data, self.coords.shape[1]) + if shape and not self.coords.size: self.coords = np.zeros((len(shape), 0), dtype=np.uint64) if shape is None: if self.coords.nbytes: - shape = tuple((self.coords.max(axis=1) + 1).tolist()) + shape = tuple((self.coords.max(axis=1) + 1)) else: shape = () @@ -243,7 +221,8 @@ def __init__(self, coords, data=None, shape=None, has_duplicates=True, else: dtype = np.uint8 self.coords = self.coords.astype(dtype) - assert not self.shape or len(data) == self.coords.shape[1] + assert not self.shape or (len(self.data) == self.coords.shape[1] and + len(self.shape) == self.coords.shape[0]) if not sorted: self._sort_indices() @@ -278,7 +257,6 @@ def enable_caching(self): True """ self._cache = defaultdict(lambda: deque(maxlen=3)) - return self @classmethod def from_numpy(cls, x): @@ -372,7 +350,7 @@ def from_scipy_sparse(cls, x): >>> np.array_equal(x.todense(), s.todense()) True """ - x = scipy.sparse.coo_matrix(x) + x = x.asformat('coo') coords = np.empty((2, x.nnz), dtype=x.row.dtype) coords[0, :] = x.row coords[1, :] = x.col @@ -380,6 +358,89 @@ def from_scipy_sparse(cls, x): has_duplicates=not x.has_canonical_format, sorted=x.has_canonical_format) + @classmethod + def from_iter(cls, x, shape=None): + """ + Converts an iterable in certain formats to a :obj:`COO` array. See examples + for details. + + Parameters + ---------- + x : Iterable or Iterator + The iterable to convert to :obj:`COO`. + shape : tuple[int], optional + The shape of the array. + + Returns + ------- + out : COO + The output :obj:`COO` array. + + Examples + -------- + You can convert items of the format ``[((i, j, k), value), ((i, j, k), value)]`` to :obj:`COO`. + Here, the first part represents the coordinate and the second part represents the value. + + >>> x = [((0, 0), 1), ((1, 1), 1)] + >>> s = COO.from_iter(x) + >>> s.todense() + array([[1, 0], + [0, 1]]) + + You can also have a similar format with a dictionary. + + >>> x = {(0, 0): 1, (1, 1): 1} + >>> s = COO.from_iter(x) + >>> s.todense() + array([[1, 0], + [0, 1]]) + + The third supported format is ``(data, (..., row, col))``. + + >>> x = ([1, 1], ([0, 1], [0, 1])) + >>> s = COO.from_iter(x) + >>> s.todense() + array([[1, 0], + [0, 1]]) + + You can also pass in a :obj:`collections.Iterator` object. + + >>> x = [((0, 0), 1), ((1, 1), 1)].__iter__() + >>> s = COO.from_iter(x) + >>> s.todense() + array([[1, 0], + [0, 1]]) + """ + if isinstance(x, dict): + x = list(x.items()) + + if not isinstance(x, Sized): + x = list(x) + + if len(x) != 2 and not all(len(item) == 2 for item in x): + raise ValueError('Invalid iterable to convert to COO.') + + if not x: + ndim = 0 if shape is None else len(shape) + coords = np.empty((ndim, 0), dtype=np.uint8) + data = np.empty((0,)) + + return COO(coords, data, shape=() if shape is None else shape, + sorted=True, has_duplicates=False) + + if not isinstance(x[0][0], Iterable): + coords = np.stack(x[1], axis=0) + data = np.asarray(x[0]) + else: + coords = np.array([item[0] for item in x]).T + data = np.array([item[1] for item in x]) + + if not (coords.ndim == 2 and data.ndim == 1 and + np.issubdtype(coords.dtype, np.integer) and np.all(coords >= 0)): + raise ValueError('Invalid iterable to convert to COO.') + + return COO(coords, data, shape=shape) + @property def dtype(self): """ @@ -1465,6 +1526,77 @@ def nonzero(self): """ return tuple(self.coords) + def asformat(self, format): + """ + Convert this sparse array to a given format. + + Parameters + ---------- + format : str + A format string. + + Returns + ------- + out : SparseArray + The converted array. + + Raises + ------ + NotImplementedError + If the format isn't supported. + """ + if format == 'coo' or format is COO: + return self + + from ..dok import DOK + if format == 'dok' or format is DOK: + return DOK.from_coo(self) + + raise NotImplementedError('The given format is not supported.') + + +def as_coo(x, shape=None): + """ + Converts any given format to :obj:`COO`. See the "See Also" section for details. + + Parameters + ---------- + x : SparseArray or numpy.ndarray or scipy.sparse.spmatrix or Iterable. + The item to convert. + shape : tuple[int], optional + The shape of the output array. Can only be used in case of Iterable. + + Returns + ------- + out : COO + The converted :obj:`COO` array. + + See Also + -------- + SparseArray.asformat : A utility function to convert between formats in this library. + COO.from_numpy : Convert a Numpy array to :obj:`COO`. + COO.from_scipy_sparse : Convert a SciPy sparse matrix to :obj:`COO`. + COO.from_iter : Convert an iterable to :obj:`COO`. + """ + if hasattr(x, 'shape') and shape is not None: + raise ValueError('Cannot provide a shape in combination with something ' + 'that already has a shape.') + + if isinstance(x, SparseArray): + return x.asformat('coo') + + if isinstance(x, np.ndarray): + return COO.from_numpy(x) + + if isinstance(x, scipy.sparse.spmatrix): + return COO.from_scipy_sparse(x) + + if isinstance(x, (Iterable, Iterator)): + return COO.from_iter(x, shape=shape) + + raise NotImplementedError('Format not supported for conversion. Supplied type is ' + '%s, see help(sparse.as_coo) for supported formats.' % type(x)) + def _keepdims(original, new, axis): shape = list(original.shape) diff --git a/sparse/dok.py b/sparse/dok.py index c7695eed..33bb539a 100644 --- a/sparse/dok.py +++ b/sparse/dok.py @@ -343,3 +343,31 @@ def todense(self): result[c] = d return result + + def asformat(self, format): + """ + Convert this sparse array to a given format. + + Parameters + ---------- + format : str + A format string. + + Returns + ------- + out : SparseArray + The converted array. + + Raises + ------ + NotImplementedError + If the format isn't supported. + """ + if format == 'dok' or format is DOK: + return self + + from .coo import COO + if format == 'coo' or format is COO: + return COO.from_iter(self.data, shape=self.shape) + + raise NotImplementedError('The given format is not supported.') diff --git a/sparse/sparse_array.py b/sparse/sparse_array.py index 9e3f9fc7..3d05816d 100644 --- a/sparse/sparse_array.py +++ b/sparse/sparse_array.py @@ -20,7 +20,7 @@ class SparseArray(object): def __init__(self, shape): if not isinstance(shape, Iterable): - shape = (int(shape),) + shape = (shape,) if not all(isinstance(l, Integral) and int(l) >= 0 for l in shape): raise ValueError('shape must be an non-negative integer or a tuple ' @@ -144,3 +144,24 @@ def density(self): 0.125 """ return self.nnz / self.size + + @abstractmethod + def asformat(self, format): + """ + Convert this sparse array to a given format. + + Parameters + ---------- + format : str + A format string. + + Returns + ------- + out : SparseArray + The converted array. + + Raises + ------ + NotImplementedError + If the format isn't supported. + """ diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 21638928..d6973e86 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -1487,3 +1487,50 @@ def test_argwhere(): x = s.todense() assert_eq(np.argwhere(s), np.argwhere(x), compare_dtype=False) + + +@pytest.mark.parametrize('format', [ + 'coo', + 'dok', +]) +def test_asformat(format): + s = sparse.random((2, 3, 4), density=0.5, format='coo') + s2 = s.asformat(format) + + assert_eq(s, s2) + + +@pytest.mark.parametrize('format', [ + sparse.COO, + sparse.DOK, + scipy.sparse.csr_matrix, + np.asarray +]) +def test_as_coo(format): + x = format(sparse.random((3, 4), density=0.5, format='coo').todense()) + + s1 = sparse.as_coo(x) + s2 = COO(x) + + assert_eq(x, s1) + assert_eq(x, s2) + + +def test_invalid_shape_error(): + s = sparse.random((3, 4), density=0.5, format='coo') + + with pytest.raises(ValueError): + sparse.as_coo(s, shape=(2, 3)) + + with pytest.raises(ValueError): + COO(s, shape=(2, 3)) + + +def test_invalid_iterable_error(): + with pytest.raises(ValueError): + x = [(3, 4, 5)] + COO.from_iter(x) + + with pytest.raises(ValueError): + x = [((2.3, 4.5), 3.2)] + COO.from_iter(x) diff --git a/sparse/tests/test_dok.py b/sparse/tests/test_dok.py index 45c98992..2443f33d 100644 --- a/sparse/tests/test_dok.py +++ b/sparse/tests/test_dok.py @@ -158,3 +158,14 @@ def test_set_zero(): assert s[0] == 0 assert s.nnz == 0 + + +@pytest.mark.parametrize('format', [ + 'coo', + 'dok', +]) +def test_asformat(format): + s = sparse.random((2, 3, 4), density=0.5, format='dok') + s2 = s.asformat(format) + + assert_eq(s, s2) diff --git a/sparse/utils.py b/sparse/utils.py index 894557f1..40e1c220 100644 --- a/sparse/utils.py +++ b/sparse/utils.py @@ -78,12 +78,12 @@ def random( Data generation callback. Must accept one single parameter: number of :code:`nnz` elements, and return one single NumPy array of exactly that length. - format: {'coo', 'dok'} + format: str The format to return the output array in. Returns ------- - {COO, DOK} + SparseArray The generated random matrix. See Also @@ -113,7 +113,6 @@ def random( # Copied, in large part, from scipy.sparse.random # See https://github.com/scipy/scipy/blob/master/LICENSE.txt from .coo import COO - from .dok import DOK elements = np.prod(shape) @@ -143,10 +142,7 @@ def random( ar = COO(ind[None, :], data, shape=nnz).reshape(shape) - if format == 'dok': - ar = DOK(ar) - - return ar + return ar.asformat(format) def isscalar(x):