Skip to content

Commit e38c2db

Browse files
committed
da.asarray should not materialize the graph
1 parent adbb6ef commit e38c2db

File tree

4 files changed

+130
-27
lines changed

4 files changed

+130
-27
lines changed

array_api_compat/dask/array/_aliases.py

+16-17
Original file line numberDiff line numberDiff line change
@@ -144,24 +144,23 @@ def asarray(
144144
See the corresponding documentation in the array library and/or the array API
145145
specification for more details.
146146
"""
147+
if isinstance(obj, da.Array):
148+
if dtype is not None and dtype != obj.dtype:
149+
if copy is False:
150+
raise ValueError("Unable to avoid copy when changing dtype")
151+
obj = obj.astype(dtype)
152+
return obj.copy() if copy else obj
153+
147154
if copy is False:
148-
# copy=False is not yet implemented in dask
149-
raise NotImplementedError("copy=False is not yet implemented")
150-
elif copy is True:
151-
if isinstance(obj, da.Array) and dtype is None:
152-
return obj.copy()
153-
# Go through numpy, since dask copy is no-op by default
154-
obj = np.array(obj, dtype=dtype, copy=True)
155-
return da.array(obj, dtype=dtype)
156-
else:
157-
if not isinstance(obj, da.Array) or dtype is not None and obj.dtype != dtype:
158-
# copy=True to be uniform across dask < 2024.12 and >= 2024.12
159-
# see https://github.com/dask/dask/pull/11524/
160-
obj = np.array(obj, dtype=dtype, copy=True)
161-
return da.from_array(obj)
162-
return obj
163-
164-
return da.asarray(obj, dtype=dtype, **kwargs)
155+
raise NotImplementedError(
156+
"Unable to avoid copy when converting a non-dask object to dask"
157+
)
158+
159+
# copy=None to be uniform across dask < 2024.12 and >= 2024.12
160+
# see https://github.com/dask/dask/pull/11524/
161+
obj = np.array(obj, dtype=dtype, copy=True)
162+
return da.from_array(obj)
163+
165164

166165
from dask.array import (
167166
# Element wise aliases

tests/test_all.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,7 @@ def test_all(library):
4040
all_names = module.__all__
4141

4242
if set(dir_names) != set(all_names):
43-
assert set(dir_names) - set(all_names) == set(), f"Some dir() names not included in __all__ for {mod_name}"
44-
assert set(all_names) - set(dir_names) == set(), f"Some __all__ names not in dir() for {mod_name}"
43+
extra_dir = set(dir_names) - set(all_names)
44+
extra_all = set(all_names) - set(dir_names)
45+
assert not extra_dir, f"Some dir() names not included in __all__ for {mod_name}: {extra_dir}"
46+
assert not extra_all, f"Some __all__ names not in dir() for {mod_name}: {extra_all}"

tests/test_common.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,17 @@ def test_asarray_copy(library):
226226
all = xp.all if library != 'dask.array' else lambda x: xp.all(x).compute()
227227

228228
if library == 'numpy' and xp.__version__[0] < '2' and not hasattr(xp, '_CopyMode') :
229-
supports_copy_false = False
230-
elif library in ['cupy', 'dask.array']:
231-
supports_copy_false = False
229+
supports_copy_false_other_ns = False
230+
supports_copy_false_same_ns = False
231+
elif library == 'cupy':
232+
supports_copy_false_other_ns = False
233+
supports_copy_false_same_ns = False
234+
elif library == 'dask.array':
235+
supports_copy_false_other_ns = False
236+
supports_copy_false_same_ns = True
232237
else:
233-
supports_copy_false = True
238+
supports_copy_false_other_ns = True
239+
supports_copy_false_same_ns = True
234240

235241
a = asarray([1])
236242
b = asarray(a, copy=True)
@@ -240,7 +246,7 @@ def test_asarray_copy(library):
240246
assert all(a[0] == 0)
241247

242248
a = asarray([1])
243-
if supports_copy_false:
249+
if supports_copy_false_same_ns:
244250
b = asarray(a, copy=False)
245251
assert is_lib_func(b)
246252
a[0] = 0
@@ -249,7 +255,7 @@ def test_asarray_copy(library):
249255
pytest.raises(NotImplementedError, lambda: asarray(a, copy=False))
250256

251257
a = asarray([1])
252-
if supports_copy_false:
258+
if supports_copy_false_same_ns:
253259
pytest.raises(ValueError, lambda: asarray(a, copy=False,
254260
dtype=xp.float64))
255261
else:
@@ -281,7 +287,7 @@ def test_asarray_copy(library):
281287
for obj in [True, 0, 0.0, 0j, [0], [[0]]]:
282288
asarray(obj, copy=True) # No error
283289
asarray(obj, copy=None) # No error
284-
if supports_copy_false:
290+
if supports_copy_false_other_ns:
285291
pytest.raises(ValueError, lambda: asarray(obj, copy=False))
286292
else:
287293
pytest.raises(NotImplementedError, lambda: asarray(obj, copy=False))
@@ -294,7 +300,7 @@ def test_asarray_copy(library):
294300
assert all(b[0] == 1.0)
295301

296302
a = array.array('f', [1.0])
297-
if supports_copy_false:
303+
if supports_copy_false_other_ns:
298304
b = asarray(a, copy=False)
299305
assert is_lib_func(b)
300306
a[0] = 0.0

tests/test_dask.py

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import dask
2+
import numpy as np
3+
import pytest
4+
import dask.array as da
5+
6+
from array_api_compat import array_namespace
7+
8+
9+
@pytest.fixture
10+
def xp():
11+
"""Fixture returning the wrapped dask namespace"""
12+
return array_namespace(da.empty(0))
13+
14+
15+
@pytest.fixture
16+
def no_compute():
17+
"""
18+
Cause the test to raise if at any point anything calls compute() or persist(),
19+
e.g. as it can be triggered implicitly by __bool__, __array__, etc.
20+
"""
21+
def get(dsk, *args, **kwargs):
22+
raise AssertionError("Called compute() or persist()")
23+
24+
with dask.config.set(scheduler=get):
25+
yield
26+
27+
28+
def test_no_compute(no_compute):
29+
"""Test the no_compute fixture"""
30+
a = da.asarray(True)
31+
with pytest.raises(AssertionError, match="Called compute"):
32+
bool(a)
33+
34+
35+
# Test no_compute for functions that use generic _aliases with xp=np
36+
37+
def test_unary_ops_no_compute(xp, no_compute):
38+
a = xp.asarray([1.5, -1.5])
39+
xp.ceil(a)
40+
xp.floor(a)
41+
xp.trunc(a)
42+
xp.sign(a)
43+
44+
45+
def test_matmul_tensordot_no_compute(xp, no_compute):
46+
A = da.ones((4, 4), chunks=2)
47+
B = da.zeros((4, 4), chunks=2)
48+
xp.matmul(A, B)
49+
xp.tensordot(A, B)
50+
51+
52+
# Test no_compute for functions that are fully bespoke for dask
53+
54+
def test_asarray_no_compute(xp, no_compute):
55+
a = xp.arange(10)
56+
xp.asarray(a)
57+
xp.asarray(a, dtype=np.int16)
58+
xp.asarray(a, dtype=a.dtype)
59+
xp.asarray(a, copy=True)
60+
xp.asarray(a, copy=True, dtype=np.int16)
61+
xp.asarray(a, copy=True, dtype=a.dtype)
62+
xp.asarray(a, copy=False)
63+
xp.asarray(a, copy=False, dtype=a.dtype)
64+
65+
66+
@pytest.mark.parametrize("copy", [True, False])
67+
def test_astype_no_compute(xp, no_compute, copy):
68+
a = xp.arange(10)
69+
xp.astype(a, np.int16, copy=copy)
70+
xp.astype(a, a.dtype, copy=copy)
71+
72+
73+
def test_clip_no_compute(xp, no_compute):
74+
a = xp.arange(10)
75+
xp.clip(a)
76+
xp.clip(a, 1)
77+
xp.clip(a, 1, 8)
78+
79+
80+
def test_generators_are_lazy(xp, no_compute):
81+
"""
82+
Test that generator functions are fully lazy, e.g. that
83+
da.ones(n) is not implemented as da.asarray(np.ones(n))
84+
"""
85+
size = 100_000_000_000 # 800 GB
86+
chunks = size // 10
87+
88+
xp.zeros(size, chunks=chunks)
89+
xp.ones(size, chunks=chunks)
90+
xp.empty(size, chunks=chunks)
91+
xp.full(size, fill_value=123, chunks=chunks)
92+
a = xp.arange(size, chunks=chunks)
93+
xp.zeros_like(a)
94+
xp.ones_like(a)
95+
xp.empty_like(a)
96+
xp.full_like(a, fill_value=123)

0 commit comments

Comments
 (0)