-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Disabled auto-caching on dask; new .compute() method #1018
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
f3d74e8
26a6997
03fbdd1
b04167c
ca94cc7
91b8084
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
import pickle | ||
import numpy as np | ||
import pandas as pd | ||
|
||
|
@@ -12,44 +13,22 @@ | |
import dask.array as da | ||
|
||
|
||
def _copy_at_variable_level(arg): | ||
"""We need to copy the argument at the level of xarray.Variable objects, so | ||
that viewing its values does not trigger lazy loading. | ||
""" | ||
if isinstance(arg, Variable): | ||
return arg.copy(deep=False) | ||
elif isinstance(arg, DataArray): | ||
ds = arg.to_dataset(name='__copied__') | ||
return _copy_at_variable_level(ds)['__copied__'] | ||
elif isinstance(arg, Dataset): | ||
ds = arg.copy() | ||
for k in list(ds): | ||
ds._variables[k] = ds._variables[k].copy(deep=False) | ||
return ds | ||
else: | ||
assert False | ||
|
||
|
||
class DaskTestCase(TestCase): | ||
def assertLazyAnd(self, expected, actual, test): | ||
expected_copy = _copy_at_variable_level(expected) | ||
actual_copy = _copy_at_variable_level(actual) | ||
expected.name = None | ||
actual.name = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was there a failing tests that required setting this? I would rather disable this on a case by case basis than across the board (not all objects passed to this method even have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, on several cases the test was failing because on one side the object automatically got its name from the underlying dask.name, while on the other side it was blank. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you simply make a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed and resolved conflict |
||
with dask.set_options(get=dask.get): | ||
test(actual_copy, expected_copy) | ||
var = getattr(actual, 'variable', actual) | ||
self.assertIsInstance(var.data, da.Array) | ||
test(actual, expected) | ||
if isinstance(actual, Dataset): | ||
for var in actual.variables.values(): | ||
self.assertIsInstance(var.data, da.Array) | ||
else: | ||
var = getattr(actual, 'variable', actual) | ||
self.assertIsInstance(var.data, da.Array) | ||
|
||
|
||
@requires_dask | ||
class TestVariable(DaskTestCase): | ||
def assertLazyAnd(self, expected, actual, test): | ||
expected_copy = expected.copy(deep=False) | ||
actual_copy = actual.copy(deep=False) | ||
with dask.set_options(get=dask.get): | ||
test(actual_copy, expected_copy) | ||
var = getattr(actual, 'variable', actual) | ||
self.assertIsInstance(var.data, da.Array) | ||
|
||
def assertLazyAndIdentical(self, expected, actual): | ||
self.assertLazyAnd(expected, actual, self.assertVariableIdentical) | ||
|
||
|
@@ -321,3 +300,29 @@ def test_dot(self): | |
eager = self.eager_array.dot(self.eager_array[0]) | ||
lazy = self.lazy_array.dot(self.lazy_array[0]) | ||
self.assertLazyAndAllClose(eager, lazy) | ||
|
||
def test_dataarray_pickle(self): | ||
# Test that pickling/unpickling does not convert the dask | ||
# backend to numpy | ||
a1 = DataArray([1,2]).chunk() | ||
self.assertFalse(a1._in_memory) | ||
a2 = pickle.loads(pickle.dumps(a1)) | ||
self.assertDataArrayIdentical(a1, a2) | ||
self.assertFalse(a1._in_memory) | ||
self.assertFalse(a2._in_memory) | ||
|
||
def test_dataset_pickle(self): | ||
ds1 = Dataset({'a': [1,2]}).chunk() | ||
self.assertFalse(ds1['a']._in_memory) | ||
ds2 = pickle.loads(pickle.dumps(ds1)) | ||
self.assertDatasetIdentical(ds1, ds2) | ||
self.assertFalse(ds1['a']._in_memory) | ||
self.assertFalse(ds2['a']._in_memory) | ||
|
||
def test_values(self): | ||
# Test that invoking the values property does not convert the dask | ||
# backend to numpy | ||
a = DataArray([1,2]).chunk() | ||
self.assertFalse(a._in_memory) | ||
self.assertEquals(a.values.tolist(), [1, 2]) | ||
self.assertFalse(a._in_memory) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These methods would belong better in
test_dask.py
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should move test_dataarray_compute() to a different module as there's test_dataset_compute() that applies to all backends.
Moving the pickle and values tests.