From f7842b3a29220d121d7bf2772e540bf2348128cb Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Fri, 1 Apr 2016 10:31:17 +0100 Subject: [PATCH 01/30] Added a first go at a converter from xarray.DataArrays objects to Iris.cube.Cube objects. Uses the same template as the cdms2 conversion. --- .travis.yml | 2 +- ... => requirements-py27-cdat+iris+pynio.yml} | 2 + xarray/convert.py | 111 ++++++++++++++++-- xarray/core/dataarray.py | 13 ++ xarray/test/test_dataarray.py | 44 +++++++ 5 files changed, 159 insertions(+), 13 deletions(-) rename ci/{requirements-py27-cdat+pynio.yml => requirements-py27-cdat+iris+pynio.yml} (89%) diff --git a/.travis.yml b/.travis.yml index 3af687b3f79..57da8637ed9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ matrix: - python: 2.7 env: CONDA_ENV=py27-min - python: 2.7 - env: CONDA_ENV=py27-cdat+pynio + env: CONDA_ENV=py27-cdat+iris+pynio - python: 3.4 env: CONDA_ENV=py34 - python: 3.5 diff --git a/ci/requirements-py27-cdat+pynio.yml b/ci/requirements-py27-cdat+iris+pynio.yml similarity index 89% rename from ci/requirements-py27-cdat+pynio.yml rename to ci/requirements-py27-cdat+iris+pynio.yml index feedb684cc8..75c5c4e6d7e 100644 --- a/ci/requirements-py27-cdat+pynio.yml +++ b/ci/requirements-py27-cdat+iris+pynio.yml @@ -2,6 +2,7 @@ name: test_env channels: - ajdawson # cdat - dbrown # pynio + - scitools # iris dependencies: - python=2.7 - cdat-lite @@ -11,6 +12,7 @@ dependencies: - pandas>=0.15.0 - pynio - scipy + - iris - pip: - coveralls - pytest-cov diff --git a/xarray/convert.py b/xarray/convert.py index 5c4624f2d01..b7bef8ce6b8 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -3,24 +3,36 @@ import numpy as np from .core.dataarray import DataArray +from .core.pycompat import OrderedDict from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) -ignored_attrs = set(['name', 'tileIndex']) +cdms2_ignored_attrs = {'name', 'tileIndex'} +iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', + 'calendar', 'leap_month', 'leap_year', 'month_lengths', + 'coordinates', 'grid_mapping', 'climatology', + 'cell_methods', 'formula_terms', 'compress', + 'missing_value', 'add_offset', 'scale_factor', + 'valid_max', 'valid_min', 'valid_range', '_FillValue'} + + +def encode(var): + return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) + + +def filter_attrs(_attrs, ignored_attrs): + return dict((k, v) for k, v in _attrs.items() if k not in ignored_attrs) def from_cdms2(variable): """Convert a cdms2 variable into an DataArray """ - def get_cdms2_attrs(var): - return dict((k, v) for k, v in var.attributes.items() - if k not in ignored_attrs) - values = np.asarray(variable) name = variable.id - coords = [(v.id, np.asarray(v), get_cdms2_attrs(v)) + coords = [(v.id, np.asarray(v), + filter_attrs(v.attributes, cdms2_ignored_attrs)) for v in variable.getAxisList()] - attrs = get_cdms2_attrs(variable) + attrs = filter_attrs(variable.attributes, cdms2_ignored_attrs) dataarray = DataArray(values, coords=coords, name=name, attrs=attrs) return decode_cf(dataarray.to_dataset())[dataarray.name] @@ -31,12 +43,9 @@ def to_cdms2(dataarray): # we don't want cdms2 to be a hard dependency import cdms2 - def encode(var): - return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) - - def set_cdms2_attrs(var, attrs): + def set_cdms2_attrs(_var, attrs): for k, v in attrs.items(): - setattr(var, k, v) + setattr(_var, k, v) axes = [] for dim in dataarray.dims: @@ -49,3 +58,81 @@ def set_cdms2_attrs(var, attrs): cdms2_var = cdms2.createVariable(var.values, axes=axes, id=dataarray.name) set_cdms2_attrs(cdms2_var, var.attrs) return cdms2_var + + +# TODO: Add converting bounds from xarray to Iris and back +# TODO: Cell methods are not converted between Iris and xarray +def to_iris(dataarray): + """Convert a DataArray into a Iris Cube + """ + # Iris not a hard dependency + import iris + # iris.unit is deprecated in Iris v1.9 + import cf_units + + def check_attrs(attrs, keys): + return dict((k, v) for k, v in attrs.items() if k in keys) + + def get_args(attrs): + _args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)} + _args.update(check_attrs(attrs, ('standard_name', 'long_name',))) + _unit_args = check_attrs(coord.attrs, ('calendar',)) + if attrs.has_key('units'): + _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) + return _args + + dim_coords = [] + aux_coords = [] + + for coord_name in dataarray.coords: + coord = encode(dataarray.coords[coord_name]) + coord_args = get_args(coord.attrs) + coord_args['var_name'] = coord_name + iris_coord = iris.coords.DimCoord(coord.values, **coord_args) + axis = None + if coord.dims: + axis = dataarray.get_axis_num(coord.dims) + if coord_name in dataarray.dims: + dim_coords.append((iris_coord, axis)) + else: + aux_coords.append((iris_coord, axis)) + + args = get_args(dataarray.attrs) + args['var_name'] = dataarray.name + args['dim_coords_and_dims'] = dim_coords + args['aux_coords_and_dims'] = aux_coords + + cube = iris.cube.Cube(dataarray.to_masked_array(), **args) + return cube + + +def from_iris(cube): + """Convert a Iris cube into an DataArray + """ + def get_attr(_obj): + attrs = {'standard_name': _obj.standard_name, + 'long_name': _obj.long_name} + if _obj.units.calendar: + attrs['calendar'] = _obj.units.calendar + if _obj.units.origin != '1': + attrs['units'] = _obj.units.origin + attrs.update(_obj.attributes) + return dict((k, v) for k, v in attrs.items() if v is not None) + + name = cube.var_name + dims = [dim.var_name for dim in cube.dim_coords] + coords = OrderedDict() + + for coord in cube.coords(): + coord_attrs = get_attr(coord) + coord_dims = [cube.coords()[i].var_name for i in cube.coord_dims(coord)] + if coord_dims: + coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) + else: + coords[coord.var_name] = ((), + np.asscalar(coord.points), coord_attrs) + + array_attrs = get_attr(cube) + dataarray = DataArray(cube.data, coords=coords, name=name, + attrs=array_attrs, dims=dims) + return decode_cf(dataarray.to_dataset())[dataarray.name] \ No newline at end of file diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index cfeda17eb91..9d8c4601a67 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1099,6 +1099,19 @@ def from_cdms2(cls, variable): from ..convert import from_cdms2 return from_cdms2(variable) + def to_iris(self): + """Convert this array into a iris.cube.Cube + """ + from ..convert import to_iris + return to_iris(self) + + @classmethod + def from_iris(cls, cube): + """Convert a iris.cube.Cube into an xarray.DataArray + """ + from ..convert import from_iris + return from_iris(cube) + def _all_compat(self, other, compat_str): """Helper function for equals and identical""" def compat(x, y): diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 7c5081c92ac..3d0531c61d3 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -1639,6 +1639,50 @@ def test_to_and_from_cdms2(self): roundtripped = DataArray.from_cdms2(actual) self.assertDataArrayIdentical(original, roundtripped) + def test_to_and_from_iris(self): + try: + import iris + except ImportError: + raise unittest.SkipTest('iris not installed') + + coord_dict = OrderedDict() + coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) + coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) + coord_dict['height'] = 10 + coord_dict['distance2'] = ('distance', [0, 1]) + + original = DataArray(np.arange(6).reshape(2, 3), coord_dict, + name='Temperature', attrs={'baz': 123, + 'units': 'Kelvin', + 'standard_name': + 'fire_temperature', + 'long_name': + 'Fire Temperature'}, + dims=('distance', 'time')) + + expected_coords = [Coordinate('distance', [-2, 2]), + Coordinate('time', [0, 1, 2]), + Coordinate('height', [10]), + Coordinate('distance2', [0, 1])] + + actual = original.to_iris() + self.assertArrayEqual(actual.data, original.data) + self.assertEqual(actual.var_name, original.name) + self.assertItemsEqual([d.var_name for d in actual.dim_coords], + original.dims) + + for coord, expected_coord in zip((actual.coords()), expected_coords): + self.assertEqual(coord.var_name, expected_coord.name) + self.assertArrayEqual(coord.points, expected_coord.values) + self.assertEqual(actual.coord_dims(coord), + original.get_axis_num + (original.coords[coord.var_name].dims)) + self.assertEqual(actual.attributes['baz'], original.attrs['baz']) + self.assertEqual(actual.standard_name, original.attrs['standard_name']) + + roundtripped = DataArray.from_iris(actual) + self.assertDataArrayIdentical(original, roundtripped) + def test_to_dataset_whole(self): unnamed = DataArray([1, 2], dims='x') with self.assertRaisesRegexp(ValueError, 'unable to convert unnamed'): From e0498c525ba8da3eb66935b6cdff1a20919d22c3 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Fri, 1 Apr 2016 19:53:30 +0100 Subject: [PATCH 02/30] Update tests to use original.coords and add extra tests for coord attributes --- xarray/test/test_dataarray.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 3d0531c61d3..e2e94285d04 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -8,6 +8,7 @@ Coordinate, Variable) from xarray.core.pycompat import iteritems, OrderedDict from xarray.core.common import _full_like +from xarray.conventions import maybe_encode_datetime from . import (TestCase, ReturnItem, source_ndarray, unittest, requires_dask, requires_bottleneck) @@ -1621,15 +1622,15 @@ def test_to_and_from_cdms2(self): [('distance', [-2, 2], {'units': 'meters'}), ('time', pd.date_range('2000-01-01', periods=3))], name='foo', attrs={'baz': 123}) - expected_coords = [Coordinate('distance', [-2, 2]), - Coordinate('time', [0, 1, 2])] + actual = original.to_cdms2() self.assertArrayEqual(actual, original) self.assertEqual(actual.id, original.name) self.assertItemsEqual(actual.getAxisIds(), original.dims) - for axis, coord in zip(actual.getAxisList(), expected_coords): + for axis, coord_key in zip(actual.getAxisList(), original.coords): + coord = original.coords[coord_key] self.assertEqual(axis.id, coord.name) - self.assertArrayEqual(axis, coord.values) + self.assertArrayEqual(axis, maybe_encode_datetime(coord).values) self.assertEqual(actual.baz, original.attrs['baz']) component_times = actual.getAxis(1).asComponentTime() @@ -1642,6 +1643,7 @@ def test_to_and_from_cdms2(self): def test_to_and_from_iris(self): try: import iris + import cf_units except ImportError: raise unittest.SkipTest('iris not installed') @@ -1649,7 +1651,7 @@ def test_to_and_from_iris(self): coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) coord_dict['height'] = 10 - coord_dict['distance2'] = ('distance', [0, 1]) + coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) original = DataArray(np.arange(6).reshape(2, 3), coord_dict, name='Temperature', attrs={'baz': 123, @@ -1660,23 +1662,25 @@ def test_to_and_from_iris(self): 'Fire Temperature'}, dims=('distance', 'time')) - expected_coords = [Coordinate('distance', [-2, 2]), - Coordinate('time', [0, 1, 2]), - Coordinate('height', [10]), - Coordinate('distance2', [0, 1])] - actual = original.to_iris() self.assertArrayEqual(actual.data, original.data) self.assertEqual(actual.var_name, original.name) self.assertItemsEqual([d.var_name for d in actual.dim_coords], original.dims) - for coord, expected_coord in zip((actual.coords()), expected_coords): - self.assertEqual(coord.var_name, expected_coord.name) - self.assertArrayEqual(coord.points, expected_coord.values) + for coord, orginal_key in zip((actual.coords()), original.coords): + original_coord = original.coords[orginal_key] + self.assertEqual(coord.var_name, original_coord.name) + self.assertArrayEqual(coord.points, + maybe_encode_datetime(original_coord).values) self.assertEqual(actual.coord_dims(coord), original.get_axis_num (original.coords[coord.var_name].dims)) + + self.assertEqual(actual.coord('distance2').attributes['foo'], + original.coords['distance2'].attrs['foo']) + self.assertEqual(actual.coord('distance').units, + cf_units.Unit(original.coords['distance'].units)) self.assertEqual(actual.attributes['baz'], original.attrs['baz']) self.assertEqual(actual.standard_name, original.attrs['standard_name']) From c26184507a25c1bdb93e423f1db4f7d61da69fa3 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Sat, 2 Apr 2016 18:40:20 +0100 Subject: [PATCH 03/30] Remove set literals just in case. Replace has_key with in. Use AuxCoord and DimCoord correctly so 2d coords will work. Use dims variable to convert dimension numbers into names. Add 2d coord to test DataArray. --- xarray/convert.py | 24 +++++++++++++----------- xarray/test/test_dataarray.py | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index b7bef8ce6b8..7cb7ca56416 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -7,13 +7,14 @@ from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) -cdms2_ignored_attrs = {'name', 'tileIndex'} -iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', - 'calendar', 'leap_month', 'leap_year', 'month_lengths', - 'coordinates', 'grid_mapping', 'climatology', - 'cell_methods', 'formula_terms', 'compress', - 'missing_value', 'add_offset', 'scale_factor', - 'valid_max', 'valid_min', 'valid_range', '_FillValue'} +cdms2_ignored_attrs = set(['name', 'tileIndex']) +iris_forbidden_keys = set( + ['standard_name', 'long_name', 'units', 'bounds', 'axis', + 'calendar', 'leap_month', 'leap_year', 'month_lengths', + 'coordinates', 'grid_mapping', 'climatology', + 'cell_methods', 'formula_terms', 'compress', + 'missing_value', 'add_offset', 'scale_factor', + 'valid_max', 'valid_min', 'valid_range', '_FillValue']) def encode(var): @@ -77,7 +78,7 @@ def get_args(attrs): _args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)} _args.update(check_attrs(attrs, ('standard_name', 'long_name',))) _unit_args = check_attrs(coord.attrs, ('calendar',)) - if attrs.has_key('units'): + if 'units' in attrs: _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args @@ -88,13 +89,14 @@ def get_args(attrs): coord = encode(dataarray.coords[coord_name]) coord_args = get_args(coord.attrs) coord_args['var_name'] = coord_name - iris_coord = iris.coords.DimCoord(coord.values, **coord_args) axis = None if coord.dims: axis = dataarray.get_axis_num(coord.dims) if coord_name in dataarray.dims: + iris_coord = iris.coords.DimCoord(coord.values, **coord_args) dim_coords.append((iris_coord, axis)) else: + iris_coord = iris.coords.AuxCoord(coord.values, **coord_args) aux_coords.append((iris_coord, axis)) args = get_args(dataarray.attrs) @@ -125,7 +127,7 @@ def get_attr(_obj): for coord in cube.coords(): coord_attrs = get_attr(coord) - coord_dims = [cube.coords()[i].var_name for i in cube.coord_dims(coord)] + coord_dims = [dims[i] for i in cube.coord_dims(coord)] if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) else: @@ -135,4 +137,4 @@ def get_attr(_obj): array_attrs = get_attr(cube) dataarray = DataArray(cube.data, coords=coords, name=name, attrs=array_attrs, dims=dims) - return decode_cf(dataarray.to_dataset())[dataarray.name] \ No newline at end of file + return decode_cf(dataarray.to_dataset())[dataarray.name] diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index e2e94285d04..1c0e8b08933 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -1652,6 +1652,7 @@ def test_to_and_from_iris(self): coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) coord_dict['height'] = 10 coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) + coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) original = DataArray(np.arange(6).reshape(2, 3), coord_dict, name='Temperature', attrs={'baz': 123, From edae053a04fa4d1642dc7e210e9eb6595d5ec1dc Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Mon, 4 Apr 2016 10:10:46 +0100 Subject: [PATCH 04/30] Create dimensions if the Iris cube does not have any --- xarray/convert.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/convert.py b/xarray/convert.py index 7cb7ca56416..2cc46ed2210 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -123,6 +123,8 @@ def get_attr(_obj): name = cube.var_name dims = [dim.var_name for dim in cube.dim_coords] + if not dims: + dims = ["dim{}".format(i) for i in range(cube.data.ndim)] coords = OrderedDict() for coord in cube.coords(): From cd92bca6e228c44e14d6e0b090240f0c5eef96de Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Fri, 8 Apr 2016 15:41:30 +0100 Subject: [PATCH 05/30] Add code to convert cell_methods --- xarray/convert.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/xarray/convert.py b/xarray/convert.py index 2cc46ed2210..a4963a114b3 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -15,6 +15,9 @@ 'cell_methods', 'formula_terms', 'compress', 'missing_value', 'add_offset', 'scale_factor', 'valid_max', 'valid_min', 'valid_range', '_FillValue']) +cell_methods_strings = set(['point', 'sum', 'maximum', 'median', 'mid_range', + 'minimum', 'mean', 'mode', 'standard_deviation', + 'variance']) def encode(var): @@ -62,7 +65,6 @@ def set_cdms2_attrs(_var, attrs): # TODO: Add converting bounds from xarray to Iris and back -# TODO: Cell methods are not converted between Iris and xarray def to_iris(dataarray): """Convert a DataArray into a Iris Cube """ @@ -82,6 +84,45 @@ def get_args(attrs): _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args + def get_cell_methods(cell_methods_str): + """Converts string to iris cell method objects""" + cell_methods = [] + _cell_method_words = [w.strip() for w in cell_methods_str.split(':')] + cm = {'coords': [], 'method': '', 'interval': [], 'comment': []} + skip = False + for i, word in enumerate(_cell_method_words): + # If this value is a comment or an interval don't read + if skip: + skip = False + continue + # If this word is an axis + if word not in cell_methods_strings | set(['interval', 'comment']): + # If we already have a method this must be the next cell_method + if cm['method']: + cell_methods.append( + iris.coords.CellMethod(cm['method'], + coords=cm['coords'], + intervals=cm['interval'], + comments=cm['comment'])) + cm = {'coords': [], 'method': '', 'interval': [], + 'comment': []} + cm['coords'].append(word) + continue + else: + cm['coords'].append(word) + elif word in ['interval', 'comment']: + cm[word].append(_cell_method_words[i + 1]) + skip = True + continue + else: + cm['method'] = word + else: + cell_methods.append( + iris.coords.CellMethod(cm['method'], coords=cm['coords'], + intervals=cm['interval'], + comments=cm['comment'])) + return cell_methods + dim_coords = [] aux_coords = [] @@ -103,6 +144,8 @@ def get_args(attrs): args['var_name'] = dataarray.name args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords + if 'cell_methods' in dataarray.attrs: + args['cell_methods'] = get_cell_methods(dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube @@ -111,6 +154,7 @@ def get_args(attrs): def from_iris(cube): """Convert a Iris cube into an DataArray """ + def get_attr(_obj): attrs = {'standard_name': _obj.standard_name, 'long_name': _obj.long_name} @@ -121,6 +165,20 @@ def get_attr(_obj): attrs.update(_obj.attributes) return dict((k, v) for k, v in attrs.items() if v is not None) + def get_cell_methods(cell_methods_obj): + _cell_methods = [] + for cell_method in cell_methods_obj: + names = ''.join(['{}: '.format(n) for n in cell_method.coord_names]) + intervals = ' '.join(['interval: {}'.format(interval) + for interval in cell_method.intervals]) + comments = ' '.join(['comment: {}'.format(comment) + for comment in cell_method.comments]) + extra = ' '.join([intervals, comments]).strip() + if extra: + extra += ' ' + _cell_methods.append(names + cell_method.method + extra) + return ' '.join(_cell_methods) + name = cube.var_name dims = [dim.var_name for dim in cube.dim_coords] if not dims: @@ -137,6 +195,9 @@ def get_attr(_obj): np.asscalar(coord.points), coord_attrs) array_attrs = get_attr(cube) + cell_methods = get_cell_methods(cube.cell_methods) + if cell_methods: + array_attrs['cell_methods'] = cell_methods dataarray = DataArray(cube.data, coords=coords, name=name, attrs=array_attrs, dims=dims) return decode_cf(dataarray.to_dataset())[dataarray.name] From 44930af72964b78c64d14b198b930c8d6a759a78 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Mon, 11 Apr 2016 10:25:58 +0100 Subject: [PATCH 06/30] Don't append blank cell method --- xarray/convert.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index a4963a114b3..c08d1082da1 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -117,10 +117,11 @@ def get_cell_methods(cell_methods_str): else: cm['method'] = word else: - cell_methods.append( - iris.coords.CellMethod(cm['method'], coords=cm['coords'], - intervals=cm['interval'], - comments=cm['comment'])) + if cm['method']: + cell_methods.append( + iris.coords.CellMethod(cm['method'], coords=cm['coords'], + intervals=cm['interval'], + comments=cm['comment'])) return cell_methods dim_coords = [] From 6bed3062e37c8b1a4e24014913f757e025a37688 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Wed, 20 Apr 2016 14:01:45 +0100 Subject: [PATCH 07/30] Update cell method code to use internal Iris functions. Also add tests. --- xarray/convert.py | 47 +++++------------------------------ xarray/test/test_dataarray.py | 6 +++++ 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index c08d1082da1..190dfd8e5ac 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -70,6 +70,8 @@ def to_iris(dataarray): """ # Iris not a hard dependency import iris + import iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ + as iris_fc_rules_cf_fc # iris.unit is deprecated in Iris v1.9 import cf_units @@ -84,45 +86,7 @@ def get_args(attrs): _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args - def get_cell_methods(cell_methods_str): - """Converts string to iris cell method objects""" - cell_methods = [] - _cell_method_words = [w.strip() for w in cell_methods_str.split(':')] - cm = {'coords': [], 'method': '', 'interval': [], 'comment': []} - skip = False - for i, word in enumerate(_cell_method_words): - # If this value is a comment or an interval don't read - if skip: - skip = False - continue - # If this word is an axis - if word not in cell_methods_strings | set(['interval', 'comment']): - # If we already have a method this must be the next cell_method - if cm['method']: - cell_methods.append( - iris.coords.CellMethod(cm['method'], - coords=cm['coords'], - intervals=cm['interval'], - comments=cm['comment'])) - cm = {'coords': [], 'method': '', 'interval': [], - 'comment': []} - cm['coords'].append(word) - continue - else: - cm['coords'].append(word) - elif word in ['interval', 'comment']: - cm[word].append(_cell_method_words[i + 1]) - skip = True - continue - else: - cm['method'] = word - else: - if cm['method']: - cell_methods.append( - iris.coords.CellMethod(cm['method'], coords=cm['coords'], - intervals=cm['interval'], - comments=cm['comment'])) - return cell_methods + get_cell_methods = iris_fc_rules_cf_fc._parse_cell_methods dim_coords = [] aux_coords = [] @@ -146,7 +110,8 @@ def get_cell_methods(cell_methods_str): args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = get_cell_methods(dataarray.attrs['cell_methods']) + args['cell_methods'] = get_cell_methods(dataarray.name, + dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube @@ -176,7 +141,7 @@ def get_cell_methods(cell_methods_obj): for comment in cell_method.comments]) extra = ' '.join([intervals, comments]).strip() if extra: - extra += ' ' + extra = ' ({})'.format(extra) _cell_methods.append(names + cell_method.method + extra) return ' '.join(_cell_methods) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 1c0e8b08933..30bcfaaed1f 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -1663,11 +1663,17 @@ def test_to_and_from_iris(self): 'Fire Temperature'}, dims=('distance', 'time')) + original.attrs['cell_methods'] = 'height: mean (comment: A cell method)' actual = original.to_iris() self.assertArrayEqual(actual.data, original.data) self.assertEqual(actual.var_name, original.name) self.assertItemsEqual([d.var_name for d in actual.dim_coords], original.dims) + self.assertEqual(actual.cell_methods, + (iris.coords.CellMethod(method='mean', + coords=('height',), + intervals=(), + comments=('A cell method',)),)) for coord, orginal_key in zip((actual.coords()), original.coords): original_coord = original.coords[orginal_key] From cd06a2e23e77fff6c96ad00c6285a5966bc63b1f Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Thu, 21 Apr 2016 19:39:31 +0100 Subject: [PATCH 08/30] Update the API for IRIS change --- xarray/convert.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 190dfd8e5ac..fd97350d236 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -70,8 +70,13 @@ def to_iris(dataarray): """ # Iris not a hard dependency import iris - import iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ - as iris_fc_rules_cf_fc + try: + from iris.fileformats.netcdf import parse_cell_methods + except ImportError: + # prior to v1.10 + from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ + import _parse_cell_methods as parse_cell_methods + # iris.unit is deprecated in Iris v1.9 import cf_units @@ -86,8 +91,6 @@ def get_args(attrs): _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args - get_cell_methods = iris_fc_rules_cf_fc._parse_cell_methods - dim_coords = [] aux_coords = [] @@ -110,8 +113,8 @@ def get_args(attrs): args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = get_cell_methods(dataarray.name, - dataarray.attrs['cell_methods']) + args['cell_methods'] = \ + parse_cell_methods(dataarray.name, dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube From e7f9cb1028ab5ffaf2b0c7de5093a673f5b8a311 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Wed, 25 May 2016 12:00:43 +0100 Subject: [PATCH 09/30] Move helper functions outside of main functions --- xarray/convert.py | 118 +++++++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 53 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index fd97350d236..15efa6eabbe 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -24,8 +24,10 @@ def encode(var): return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) -def filter_attrs(_attrs, ignored_attrs): - return dict((k, v) for k, v in _attrs.items() if k not in ignored_attrs) +def _filter_attrs(attrs, ignored_attrs): + """ Return attrs that are not in ignored_attrs + """ + return dict((k, v) for k, v in attrs.items() if k not in ignored_attrs) def from_cdms2(variable): @@ -34,9 +36,9 @@ def from_cdms2(variable): values = np.asarray(variable) name = variable.id coords = [(v.id, np.asarray(v), - filter_attrs(v.attributes, cdms2_ignored_attrs)) + _filter_attrs(v.attributes, cdms2_ignored_attrs)) for v in variable.getAxisList()] - attrs = filter_attrs(variable.attributes, cdms2_ignored_attrs) + attrs = _filter_attrs(variable.attributes, cdms2_ignored_attrs) dataarray = DataArray(values, coords=coords, name=name, attrs=attrs) return decode_cf(dataarray.to_dataset())[dataarray.name] @@ -47,9 +49,9 @@ def to_cdms2(dataarray): # we don't want cdms2 to be a hard dependency import cdms2 - def set_cdms2_attrs(_var, attrs): + def set_cdms2_attrs(var, attrs): for k, v in attrs.items(): - setattr(_var, k, v) + setattr(var, k, v) axes = [] for dim in dataarray.dims: @@ -64,9 +66,28 @@ def set_cdms2_attrs(_var, attrs): return cdms2_var +def _pick_attrs(attrs, keys): + """ Return attrs with keys in keys list + """ + return dict((k, v) for k, v in attrs.items() if k in keys) + + +def _get_iris_args(attrs): + """ Converts the xarray attrs into args that can be passed into Iris + """ + # iris.unit is deprecated in Iris v1.9 + import cf_units + args = {'attributes': _filter_attrs(attrs, iris_forbidden_keys)} + args.update(_pick_attrs(attrs, ('standard_name', 'long_name',))) + unit_args = _pick_attrs(attrs, ('calendar',)) + if 'units' in attrs: + args['units'] = cf_units.Unit(attrs['units'], **unit_args) + return args + + # TODO: Add converting bounds from xarray to Iris and back def to_iris(dataarray): - """Convert a DataArray into a Iris Cube + """ Convert a DataArray into a Iris Cube """ # Iris not a hard dependency import iris @@ -77,26 +98,12 @@ def to_iris(dataarray): from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ import _parse_cell_methods as parse_cell_methods - # iris.unit is deprecated in Iris v1.9 - import cf_units - - def check_attrs(attrs, keys): - return dict((k, v) for k, v in attrs.items() if k in keys) - - def get_args(attrs): - _args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)} - _args.update(check_attrs(attrs, ('standard_name', 'long_name',))) - _unit_args = check_attrs(coord.attrs, ('calendar',)) - if 'units' in attrs: - _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) - return _args - dim_coords = [] aux_coords = [] for coord_name in dataarray.coords: coord = encode(dataarray.coords[coord_name]) - coord_args = get_args(coord.attrs) + coord_args = _get_iris_args(coord.attrs) coord_args['var_name'] = coord_name axis = None if coord.dims: @@ -108,46 +115,51 @@ def get_args(attrs): iris_coord = iris.coords.AuxCoord(coord.values, **coord_args) aux_coords.append((iris_coord, axis)) - args = get_args(dataarray.attrs) + args = _get_iris_args(dataarray.attrs) args['var_name'] = dataarray.name args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = \ - parse_cell_methods(dataarray.name, dataarray.attrs['cell_methods']) + args['cell_methods'] = parse_cell_methods( + dataarray.name, dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube -def from_iris(cube): - """Convert a Iris cube into an DataArray +def _iris_obj_to_attrs(obj): + """ Return a dictionary of attrs when given a Iris object + """ + attrs = {'standard_name': obj.standard_name, + 'long_name': obj.long_name} + if obj.units.calendar: + attrs['calendar'] = obj.units.calendar + if obj.units.origin != '1': + attrs['units'] = obj.units.origin + attrs.update(obj.attributes) + return dict((k, v) for k, v in attrs.items() if v is not None) + + +def _iris_cell_methods_to_str(cell_methods_obj): + """ Converts a Iris cell methods into a string """ + cell_methods = [] + for cell_method in cell_methods_obj: + names = ''.join(['{}: '.format(n) for n in cell_method.coord_names]) + intervals = ' '.join(['interval: {}'.format(interval) + for interval in cell_method.intervals]) + comments = ' '.join(['comment: {}'.format(comment) + for comment in cell_method.comments]) + extra = ' '.join([intervals, comments]).strip() + if extra: + extra = ' ({})'.format(extra) + cell_methods.append(names + cell_method.method + extra) + return ' '.join(cell_methods) - def get_attr(_obj): - attrs = {'standard_name': _obj.standard_name, - 'long_name': _obj.long_name} - if _obj.units.calendar: - attrs['calendar'] = _obj.units.calendar - if _obj.units.origin != '1': - attrs['units'] = _obj.units.origin - attrs.update(_obj.attributes) - return dict((k, v) for k, v in attrs.items() if v is not None) - - def get_cell_methods(cell_methods_obj): - _cell_methods = [] - for cell_method in cell_methods_obj: - names = ''.join(['{}: '.format(n) for n in cell_method.coord_names]) - intervals = ' '.join(['interval: {}'.format(interval) - for interval in cell_method.intervals]) - comments = ' '.join(['comment: {}'.format(comment) - for comment in cell_method.comments]) - extra = ' '.join([intervals, comments]).strip() - if extra: - extra = ' ({})'.format(extra) - _cell_methods.append(names + cell_method.method + extra) - return ' '.join(_cell_methods) +def from_iris(cube): + """ Convert a Iris cube into an DataArray + """ name = cube.var_name dims = [dim.var_name for dim in cube.dim_coords] if not dims: @@ -155,7 +167,7 @@ def get_cell_methods(cell_methods_obj): coords = OrderedDict() for coord in cube.coords(): - coord_attrs = get_attr(coord) + coord_attrs = _iris_obj_to_attrs(coord) coord_dims = [dims[i] for i in cube.coord_dims(coord)] if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) @@ -163,8 +175,8 @@ def get_cell_methods(cell_methods_obj): coords[coord.var_name] = ((), np.asscalar(coord.points), coord_attrs) - array_attrs = get_attr(cube) - cell_methods = get_cell_methods(cube.cell_methods) + array_attrs = _iris_obj_to_attrs(cube) + cell_methods = _iris_cell_methods_to_str(cube.cell_methods) if cell_methods: array_attrs['cell_methods'] = cell_methods dataarray = DataArray(cube.data, coords=coords, name=name, From 877d06f17379f2879f334304cdc62328f684d900 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Tue, 9 Aug 2016 15:49:01 +0100 Subject: [PATCH 10/30] Update to build dims with mix of Dimension and Auxiliary coordinates --- xarray/convert.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 15efa6eabbe..75e672b3ba0 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -160,10 +160,20 @@ def _iris_cell_methods_to_str(cell_methods_obj): def from_iris(cube): """ Convert a Iris cube into an DataArray """ + import iris.exceptions name = cube.var_name - dims = [dim.var_name for dim in cube.dim_coords] - if not dims: - dims = ["dim{}".format(i) for i in range(cube.data.ndim)] + dims = [] + for dim in xrange(cube.ndim): + try: + dim_coord = cube.coord(dim_coords=True, dimensions=(dim,)) + dims.append(dim_coord.var_name) + except iris.exceptions.CoordinateNotFoundError: + index_coord = range(cube.shape[dim]) + dims.append("dim{}".format(index_coord)) + + # dims = [dim.var_name for dim in cube.dim_coords] + # if not dims: + # dims = ["dim{}".format(i) for i in range(cube.data.ndim)] coords = OrderedDict() for coord in cube.coords(): From e42aeb2adacc0d3be9e93c94f07b2dde6bae3429 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Tue, 9 Aug 2016 15:52:49 +0100 Subject: [PATCH 11/30] Fix import after merge --- xarray/test/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index e873280721b..adca0f47a87 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -10,7 +10,7 @@ Coordinate, Variable) from xarray.core.pycompat import iteritems, OrderedDict from xarray.core.common import _full_like - +from xarray.conventions import maybe_encode_datetime from xarray.test import (TestCase, ReturnItem, source_ndarray, unittest, requires_dask, requires_bottleneck) From 338ef6b7242c06d5107d2224c1df4cdb2a4b951c Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Wed, 10 Aug 2016 19:56:34 +0100 Subject: [PATCH 12/30] Bug fix / refactoring --- xarray/convert.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 75e672b3ba0..a09497a11fd 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -163,13 +163,12 @@ def from_iris(cube): import iris.exceptions name = cube.var_name dims = [] - for dim in xrange(cube.ndim): + for i in xrange(cube.ndim): try: - dim_coord = cube.coord(dim_coords=True, dimensions=(dim,)) + dim_coord = cube.coord(dim_coords=True, dimensions=(i,)) dims.append(dim_coord.var_name) except iris.exceptions.CoordinateNotFoundError: - index_coord = range(cube.shape[dim]) - dims.append("dim{}".format(index_coord)) + dims.append("dim_{}".format(i)) # dims = [dim.var_name for dim in cube.dim_coords] # if not dims: From 46f68ff315064e600e5da1aa10252ea227012e54 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Thu, 11 Aug 2016 14:19:06 +0100 Subject: [PATCH 13/30] Change the dencode_cf method and raise error if coord has no var_name --- xarray/convert.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index a09497a11fd..d06f2836821 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -170,14 +170,13 @@ def from_iris(cube): except iris.exceptions.CoordinateNotFoundError: dims.append("dim_{}".format(i)) - # dims = [dim.var_name for dim in cube.dim_coords] - # if not dims: - # dims = ["dim{}".format(i) for i in range(cube.data.ndim)] coords = OrderedDict() for coord in cube.coords(): coord_attrs = _iris_obj_to_attrs(coord) coord_dims = [dims[i] for i in cube.coord_dims(coord)] + if not coord.var_name: + raise ValueError('Coordinate has no var_name') if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) else: @@ -190,4 +189,5 @@ def from_iris(cube): array_attrs['cell_methods'] = cell_methods dataarray = DataArray(cube.data, coords=coords, name=name, attrs=array_attrs, dims=dims) - return decode_cf(dataarray.to_dataset())[dataarray.name] + decoded_ds = decode_cf(dataarray._to_temp_dataset()) + return dataarray._from_temp_dataset(decoded_ds) From edcb3fdb3442f67e6d330be4126361b73eb76516 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Thu, 30 Nov 2017 21:52:12 +0000 Subject: [PATCH 14/30] Add missing test and two minor fixes --- xarray/convert.py | 7 ++--- xarray/tests/test_dataarray.py | 56 +++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 0c8bf8cd2a5..53d8e1c5ce5 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -7,7 +7,7 @@ import numpy as np from .core.dataarray import DataArray -from .core.pycompat import OrderedDict +from .core.pycompat import OrderedDict, range from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) @@ -124,8 +124,7 @@ def to_iris(dataarray): args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = parse_cell_methods( - dataarray.name, dataarray.attrs['cell_methods']) + args['cell_methods'] = parse_cell_methods(dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube @@ -167,7 +166,7 @@ def from_iris(cube): import iris.exceptions name = cube.var_name dims = [] - for i in xrange(cube.ndim): + for i in range(cube.ndim): try: dim_coord = cube.coord(dim_coords=True, dimensions=(i,)) dims.append(dim_coord.var_name) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c86f706f2ce..9323de63fa7 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -15,7 +15,7 @@ IndexVariable, Variable) from xarray.core.pycompat import iteritems, OrderedDict from xarray.core.common import full_like - +from xarray.conventions import maybe_encode_datetime from xarray.tests import ( TestCase, ReturnItem, source_ndarray, unittest, requires_dask, assert_identical, assert_equal, assert_allclose, assert_array_equal, @@ -2724,6 +2724,60 @@ def test_to_and_from_cdms2(self): roundtripped = DataArray.from_cdms2(actual) self.assertDataArrayIdentical(original, roundtripped) + def test_to_and_from_iris(self): + try: + import iris + import cf_units + except ImportError: + raise unittest.SkipTest('iris not installed') + + coord_dict = OrderedDict() + coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) + coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) + coord_dict['height'] = 10 + coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) + coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) + + original = DataArray(np.arange(6).reshape(2, 3), coord_dict, + name='Temperature', attrs={'baz': 123, + 'units': 'Kelvin', + 'standard_name': + 'fire_temperature', + 'long_name': + 'Fire Temperature'}, + dims=('distance', 'time')) + + original.attrs['cell_methods'] = 'height: mean (comment: A cell method)' + actual = original.to_iris() + self.assertArrayEqual(actual.data, original.data) + self.assertEqual(actual.var_name, original.name) + self.assertItemsEqual([d.var_name for d in actual.dim_coords], + original.dims) + self.assertEqual(actual.cell_methods, + (iris.coords.CellMethod(method='mean', + coords=('height',), + intervals=(), + comments=('A cell method',)),)) + + for coord, orginal_key in zip((actual.coords()), original.coords): + original_coord = original.coords[orginal_key] + self.assertEqual(coord.var_name, original_coord.name) + self.assertArrayEqual(coord.points, + maybe_encode_datetime(original_coord).values) + self.assertEqual(actual.coord_dims(coord), + original.get_axis_num + (original.coords[coord.var_name].dims)) + + self.assertEqual(actual.coord('distance2').attributes['foo'], + original.coords['distance2'].attrs['foo']) + self.assertEqual(actual.coord('distance').units, + cf_units.Unit(original.coords['distance'].units)) + self.assertEqual(actual.attributes['baz'], original.attrs['baz']) + self.assertEqual(actual.standard_name, original.attrs['standard_name']) + + roundtripped = DataArray.from_iris(actual) + self.assertDataArrayIdentical(original, roundtripped) + def test_to_dataset_whole(self): unnamed = DataArray([1, 2], dims='x') with raises_regex(ValueError, 'unable to convert unnamed'): From fb724daf2e909b6fbb84f35413f4079c605db211 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Thu, 30 Nov 2017 21:52:50 +0000 Subject: [PATCH 15/30] Replacing function calls with set literals --- xarray/convert.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 53d8e1c5ce5..a0d4075c999 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -11,17 +11,13 @@ from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) -cdms2_ignored_attrs = set(['name', 'tileIndex']) -iris_forbidden_keys = set( - ['standard_name', 'long_name', 'units', 'bounds', 'axis', - 'calendar', 'leap_month', 'leap_year', 'month_lengths', - 'coordinates', 'grid_mapping', 'climatology', - 'cell_methods', 'formula_terms', 'compress', - 'missing_value', 'add_offset', 'scale_factor', - 'valid_max', 'valid_min', 'valid_range', '_FillValue']) -cell_methods_strings = set(['point', 'sum', 'maximum', 'median', 'mid_range', - 'minimum', 'mean', 'mode', 'standard_deviation', - 'variance']) +cdms2_ignored_attrs = {'name', 'tileIndex'} +iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', 'calendar', 'leap_month', 'leap_year', + 'month_lengths', 'coordinates', 'grid_mapping', 'climatology', 'cell_methods', 'formula_terms', + 'compress', 'missing_value', 'add_offset', 'scale_factor', 'valid_max', 'valid_min', + 'valid_range', '_FillValue'} +cell_methods_strings = {'point', 'sum', 'maximum', 'median', 'mid_range', 'minimum', 'mean', 'mode', + 'standard_deviation', 'variance'} def encode(var): From d510244fb57424acebdb0fa165582079a94021bc Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Thu, 30 Nov 2017 22:04:03 +0000 Subject: [PATCH 16/30] Adding what's new and updating api.rst --- doc/api.rst | 1 + doc/whats-new.rst | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 7bcb844783a..234221704fe 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -446,6 +446,7 @@ DataArray methods DataArray.to_index DataArray.to_masked_array DataArray.to_cdms2 + DataArray.to_iris DataArray.to_dict DataArray.from_series DataArray.from_cdms2 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 899175af45f..82fe77fbd1f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,11 @@ Enhancements - :py:func:`~plot.contourf()` learned to contour 2D variables that have both a 1D co-ordinate (e.g. time) and a 2D co-ordinate (e.g. depth as a function of time). By `Deepak Cherian `_. +- Added :py:meth:`DataArray.to_iris ` for + converting a data array into an Iris_ Cube with the same data and coordinates (:issue:`621` and :issue:`37`). + By `Neil Parley `_. +.. _Iris: http://scitools.org.uk/iris Bug fixes ~~~~~~~~~ From 208d129d917837d7fac161fd07a21006dbd36a8e Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Fri, 1 Dec 2017 08:19:28 +0000 Subject: [PATCH 17/30] Adding from_cube method to docs --- doc/api.rst | 1 + doc/whats-new.rst | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 234221704fe..719ca166b8e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -447,6 +447,7 @@ DataArray methods DataArray.to_masked_array DataArray.to_cdms2 DataArray.to_iris + DataArray.from_iris DataArray.to_dict DataArray.from_series DataArray.from_cdms2 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 82fe77fbd1f..7fdb22de4d9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,8 +23,8 @@ Enhancements - :py:func:`~plot.contourf()` learned to contour 2D variables that have both a 1D co-ordinate (e.g. time) and a 2D co-ordinate (e.g. depth as a function of time). By `Deepak Cherian `_. -- Added :py:meth:`DataArray.to_iris ` for - converting a data array into an Iris_ Cube with the same data and coordinates (:issue:`621` and :issue:`37`). +- Added :py:meth:`DataArray.to_iris ` and :py:meth:`DataArray.from_iris ` for + converting data arrays to and from Iris_ Cubes with the same data and coordinates (:issue:`621` and :issue:`37`). By `Neil Parley `_. .. _Iris: http://scitools.org.uk/iris From 5d933a8119d6cdd7a9635148ce3b17e1adf19b4d Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Tue, 5 Dec 2017 17:23:34 +0000 Subject: [PATCH 18/30] Make Iris dependency >=1.10 so we can rely on the newer parse_cell_methods interface (this is already pretty old now) --- ci/requirements-py27-cdat+iris+pynio.yml | 2 +- xarray/convert.py | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/ci/requirements-py27-cdat+iris+pynio.yml b/ci/requirements-py27-cdat+iris+pynio.yml index 1063eb2b05d..23544c47d22 100644 --- a/ci/requirements-py27-cdat+iris+pynio.yml +++ b/ci/requirements-py27-cdat+iris+pynio.yml @@ -22,7 +22,7 @@ dependencies: - seaborn - toolz - rasterio - - iris + - iris>=1.10 - pip: - coveralls - pytest-cov diff --git a/xarray/convert.py b/xarray/convert.py index a0d4075c999..7190bee309c 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -91,12 +91,7 @@ def to_iris(dataarray): """ # Iris not a hard dependency import iris - try: - from iris.fileformats.netcdf import parse_cell_methods - except ImportError: - # prior to v1.10 - from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ - import _parse_cell_methods as parse_cell_methods + from iris.fileformats.netcdf import parse_cell_methods dim_coords = [] aux_coords = [] From 05760aa7ff3d2c75819f4f2e9b618b3ed2bd0c0c Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Tue, 5 Dec 2017 17:25:16 +0000 Subject: [PATCH 19/30] Adding Iris section to the I/O docs --- doc/io.rst | 28 ++++++++++++++++++++++++++++ doc/whats-new.rst | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 9e17ffc034f..e0ba87526a7 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -338,6 +338,34 @@ supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'nol By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time difference between values, with a reference time of the first time value. +.. _io.iris: + +Iris +---- + +The Iris_ tool allows easy reading of common meteorological and climate model formats +(including GRIB and UK MetOffice PP files) into ``Cube``s which are in many ways very +similar to to ``DataArray``s, while enforcing a CF-compliant data model. If iris is +installaed xarray can convert a ``Cube`` into a ``DataArray`` using +:py:meth:`~xarray.Dataset.from_iris`: + +.. ipython:: python + + da_cube = xr.Dataset.from_iris(cube) + da_cube + + +Conversly, We can create a new cube object from a ``DataArray`` using +:py:meth:`~xarray.Dataset.from_dict`: + +.. ipython:: python + + cube = da.to_iris() + cube + +.. _Iris: http://scitools.org.uk/iris + + OPeNDAP ------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7fdb22de4d9..a1d373f031b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,7 +25,7 @@ Enhancements By `Deepak Cherian `_. - Added :py:meth:`DataArray.to_iris ` and :py:meth:`DataArray.from_iris ` for converting data arrays to and from Iris_ Cubes with the same data and coordinates (:issue:`621` and :issue:`37`). - By `Neil Parley `_. + By `Neil Parley `_ and `Duncan Watson-Parris `_. .. _Iris: http://scitools.org.uk/iris From 90ada4b00ff85a742c154c128ee016888cf86ee6 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Wed, 6 Dec 2017 09:39:44 +0000 Subject: [PATCH 20/30] Fixing typos --- doc/io.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index e0ba87526a7..1b30c69cc28 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -344,9 +344,9 @@ Iris ---- The Iris_ tool allows easy reading of common meteorological and climate model formats -(including GRIB and UK MetOffice PP files) into ``Cube``s which are in many ways very +(including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very similar to to ``DataArray``s, while enforcing a CF-compliant data model. If iris is -installaed xarray can convert a ``Cube`` into a ``DataArray`` using +installed xarray can convert a ``Cube`` into a ``DataArray`` using :py:meth:`~xarray.Dataset.from_iris`: .. ipython:: python @@ -355,8 +355,8 @@ installaed xarray can convert a ``Cube`` into a ``DataArray`` using da_cube -Conversly, We can create a new cube object from a ``DataArray`` using -:py:meth:`~xarray.Dataset.from_dict`: +Conversly, we can create a new cube object from a ``DataArray`` using +:py:meth:`~xarray.Dataset.to_iris`: .. ipython:: python From ebf38206d3518cc27c2403de652033c7f0115e25 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Wed, 6 Dec 2017 09:42:29 +0000 Subject: [PATCH 21/30] Improving error message --- xarray/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/convert.py b/xarray/convert.py index 7190bee309c..7ac57fe83c5 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -170,7 +170,7 @@ def from_iris(cube): coord_attrs = _iris_obj_to_attrs(coord) coord_dims = [dims[i] for i in cube.coord_dims(coord)] if not coord.var_name: - raise ValueError('Coordinate has no var_name') + raise ValueError("Coordinate '{}' has no var_name attribute".format(coord.name())) if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) else: From 014421521d824820e39f80b1b88dbd28c111683e Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Wed, 6 Dec 2017 10:31:56 +0000 Subject: [PATCH 22/30] Test edge cases --- xarray/tests/test_dataarray.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 9323de63fa7..4a8d70acdb9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2778,6 +2778,14 @@ def test_to_and_from_iris(self): roundtripped = DataArray.from_iris(actual) self.assertDataArrayIdentical(original, roundtripped) + actual.remove_coord('time') + auto_time_dimension = DataArray.from_iris(actual) + self.assertEqual(auto_time_dimension.dims, ('distance', 'dim_1')) + + actual.coord('distance').var_name = None + with raises_regex(ValueError, 'no var_name attribute'): + DataArray.from_iris(actual) + def test_to_dataset_whole(self): unnamed = DataArray([1, 2], dims='x') with raises_regex(ValueError, 'unable to convert unnamed'): From 3a4fc6398b517d7078a32c05c00dc5f473c281b2 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Wed, 6 Dec 2017 10:38:57 +0000 Subject: [PATCH 23/30] Preserve dask data arrays if at all possible. Convert to (dask) masked arrays when going to Iris and filled (dask) arrays when coming from Iris. --- xarray/convert.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 7ac57fe83c5..930380dc74b 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -8,8 +8,10 @@ from .core.dataarray import DataArray from .core.pycompat import OrderedDict, range +from .core.dtypes import get_fill_value from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) +from dask.array import ma cdms2_ignored_attrs = {'name', 'tileIndex'} iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', 'calendar', 'leap_month', 'leap_year', @@ -117,7 +119,7 @@ def to_iris(dataarray): if 'cell_methods' in dataarray.attrs: args['cell_methods'] = parse_cell_methods(dataarray.attrs['cell_methods']) - cube = iris.cube.Cube(dataarray.to_masked_array(), **args) + cube = iris.cube.Cube(ma.masked_invalid(dataarray), **args) return cube @@ -181,7 +183,9 @@ def from_iris(cube): cell_methods = _iris_cell_methods_to_str(cube.cell_methods) if cell_methods: array_attrs['cell_methods'] = cell_methods - dataarray = DataArray(cube.data, coords=coords, name=name, + + cube_data = ma.filled(cube.core_data(), get_fill_value(cube.dtype)) if hasattr(cube, 'core_data') else cube.data + dataarray = DataArray(cube_data, coords=coords, name=name, attrs=array_attrs, dims=dims) decoded_ds = decode_cf(dataarray._to_temp_dataset()) return dataarray._from_temp_dataset(decoded_ds) From 95b0197ebb0b5258db8deee48b2657fab7d86ca4 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Mon, 18 Dec 2017 11:16:06 +0000 Subject: [PATCH 24/30] Updates to remove the hard dependency on dask, and to test the conversion of dask arrays which may or may not be masked. --- xarray/convert.py | 40 ++++++++++++++--- xarray/tests/test_dataarray.py | 81 +++++++++++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 8 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 930380dc74b..e663d571b59 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -10,8 +10,7 @@ from .core.pycompat import OrderedDict, range from .core.dtypes import get_fill_value from .conventions import ( - maybe_encode_timedelta, maybe_encode_datetime, decode_cf) -from dask.array import ma + maybe_encode_timedelta, maybe_encode_datetime, decode_cf, decode_cf_variable) cdms2_ignored_attrs = {'name', 'tileIndex'} iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', 'calendar', 'leap_month', 'leap_year', @@ -94,6 +93,11 @@ def to_iris(dataarray): # Iris not a hard dependency import iris from iris.fileformats.netcdf import parse_cell_methods + try: + from dask.array import ma as dask_ma + from dask.array import Array + except ImportError: + dask_ma = None dim_coords = [] aux_coords = [] @@ -119,7 +123,14 @@ def to_iris(dataarray): if 'cell_methods' in dataarray.attrs: args['cell_methods'] = parse_cell_methods(dataarray.attrs['cell_methods']) - cube = iris.cube.Cube(ma.masked_invalid(dataarray), **args) + # Create the right type of masked array (should be easier after #1769) + if isinstance(dataarray.data, Array): + masked_data = dask_ma.masked_invalid(dataarray) + else: + masked_data = np.ma.masked_invalid(dataarray) + + cube = iris.cube.Cube(masked_data, **args) + return cube @@ -157,6 +168,12 @@ def from_iris(cube): """ Convert a Iris cube into an DataArray """ import iris.exceptions + try: + from dask.array import ma as dask_ma + from dask.array import Array + except ImportError: + dask_ma = None + name = cube.var_name dims = [] for i in range(cube.ndim): @@ -176,16 +193,25 @@ def from_iris(cube): if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) else: - coords[coord.var_name] = ((), - np.asscalar(coord.points), coord_attrs) + coords[coord.var_name] = ((), np.asscalar(coord.points), coord_attrs) array_attrs = _iris_obj_to_attrs(cube) cell_methods = _iris_cell_methods_to_str(cube.cell_methods) if cell_methods: array_attrs['cell_methods'] = cell_methods - cube_data = ma.filled(cube.core_data(), get_fill_value(cube.dtype)) if hasattr(cube, 'core_data') else cube.data - dataarray = DataArray(cube_data, coords=coords, name=name, + # Deal with iris 1.* and 2.* + cube_data = cube.core_data() if hasattr(cube, 'core_data') else cube.data + + # Deal with dask and numpy masked arrays + if dask_ma and isinstance(cube_data, Array): + filled_data = dask_ma.filled(cube_data, get_fill_value(cube.dtype)) + elif isinstance(cube_data, np.ma.MaskedArray): + filled_data = np.ma.filled(cube_data, get_fill_value(cube.dtype)) + else: + filled_data = cube_data + + dataarray = DataArray(filled_data, coords=coords, name=name, attrs=array_attrs, dims=dims) decoded_ds = decode_cf(dataarray._to_temp_dataset()) return dataarray._from_temp_dataset(decoded_ds) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4a8d70acdb9..86145bbe8b5 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2738,7 +2738,7 @@ def test_to_and_from_iris(self): coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) - original = DataArray(np.arange(6).reshape(2, 3), coord_dict, + original = DataArray(np.arange(6, dtype='float').reshape(2, 3), coord_dict, name='Temperature', attrs={'baz': 123, 'units': 'Kelvin', 'standard_name': @@ -2747,6 +2747,9 @@ def test_to_and_from_iris(self): 'Fire Temperature'}, dims=('distance', 'time')) + # Set a bad value to test the masking logic + original.data[0, 2] = np.NaN + original.attrs['cell_methods'] = 'height: mean (comment: A cell method)' actual = original.to_iris() self.assertArrayEqual(actual.data, original.data) @@ -2786,6 +2789,82 @@ def test_to_and_from_iris(self): with raises_regex(ValueError, 'no var_name attribute'): DataArray.from_iris(actual) + @requires_dask + def test_to_and_from_iris_dask(self): + import dask.array as da + try: + import iris + import cf_units + except ImportError: + raise unittest.SkipTest('iris not installed') + + coord_dict = OrderedDict() + coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) + coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) + coord_dict['height'] = 10 + coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) + coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) + + original = DataArray(da.from_array(np.arange(-1, 5, dtype='float').reshape(2, 3), 3), coord_dict, + name='Temperature', attrs={'baz': 123, + 'units': 'Kelvin', + 'standard_name': + 'fire_temperature', + 'long_name': + 'Fire Temperature'}, + dims=('distance', 'time')) + + # Set a bad value to test the masking logic + original.data = da.ma.masked_less(original.data, 0) + + original.attrs['cell_methods'] = 'height: mean (comment: A cell method)' + actual = original.to_iris() + + # Be careful not to trigger the loading of the iris data + actual_data = actual.core_data() if hasattr(actual, 'core_data') else actual.data + self.assertArrayEqual(actual_data, original.data) + self.assertEqual(actual.var_name, original.name) + self.assertItemsEqual([d.var_name for d in actual.dim_coords], + original.dims) + self.assertEqual(actual.cell_methods, + (iris.coords.CellMethod(method='mean', + coords=('height',), + intervals=(), + comments=('A cell method',)),)) + + for coord, orginal_key in zip((actual.coords()), original.coords): + original_coord = original.coords[orginal_key] + self.assertEqual(coord.var_name, original_coord.name) + self.assertArrayEqual(coord.points, + maybe_encode_datetime(original_coord).values) + self.assertEqual(actual.coord_dims(coord), + original.get_axis_num + (original.coords[coord.var_name].dims)) + + self.assertEqual(actual.coord('distance2').attributes['foo'], + original.coords['distance2'].attrs['foo']) + self.assertEqual(actual.coord('distance').units, + cf_units.Unit(original.coords['distance'].units)) + self.assertEqual(actual.attributes['baz'], original.attrs['baz']) + self.assertEqual(actual.standard_name, original.attrs['standard_name']) + + roundtripped = DataArray.from_iris(actual) + self.assertDataArrayIdentical(original, roundtripped) + + # If the Iris version supports it then we should get a dask array back + if hasattr(actual, 'core_data'): + pass + # TODO This currently fails due to the decoding loading the data (#1372) + # self.assertEqual(type(original.data), type(roundtripped.data)) + + actual.remove_coord('time') + auto_time_dimension = DataArray.from_iris(actual) + self.assertEqual(auto_time_dimension.dims, ('distance', 'dim_1')) + + actual.coord('distance').var_name = None + with raises_regex(ValueError, 'no var_name attribute'): + DataArray.from_iris(actual) + def test_to_dataset_whole(self): unnamed = DataArray([1, 2], dims='x') with raises_regex(ValueError, 'unable to convert unnamed'): From 1470bc58b0921ed063a929ef39f716d631d0f10b Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Mon, 18 Dec 2017 15:04:11 +0000 Subject: [PATCH 25/30] Minor doc fixes --- doc/io.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 7e8fb2fc1b0..9130bb7d645 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -345,11 +345,12 @@ Iris The Iris_ tool allows easy reading of common meteorological and climate model formats (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very -similar to to ``DataArray``s, while enforcing a CF-compliant data model. If iris is +similar to ``DataArray``s, while enforcing a CF-compliant data model. If iris is installed xarray can convert a ``Cube`` into a ``DataArray`` using :py:meth:`~xarray.Dataset.from_iris`: .. ipython:: python + :verbatim: da_cube = xr.Dataset.from_iris(cube) da_cube @@ -359,6 +360,7 @@ Conversly, we can create a new cube object from a ``DataArray`` using :py:meth:`~xarray.Dataset.to_iris`: .. ipython:: python + :verbatim: cube = da.to_iris() cube From b457c74f3db3f072993580cc6a3a9424d6933ee9 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Mon, 18 Dec 2017 17:22:30 +0000 Subject: [PATCH 26/30] Minor typo --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 9130bb7d645..7a3e1938d6f 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -345,7 +345,7 @@ Iris The Iris_ tool allows easy reading of common meteorological and climate model formats (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very -similar to ``DataArray``s, while enforcing a CF-compliant data model. If iris is +similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is installed xarray can convert a ``Cube`` into a ``DataArray`` using :py:meth:`~xarray.Dataset.from_iris`: From 441a84f0744fe0bf51688b86cdd80e063204f138 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Mon, 18 Dec 2017 17:26:24 +0000 Subject: [PATCH 27/30] Use dask_array_type for dask checks --- xarray/convert.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index e663d571b59..5cb3f47a050 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -93,11 +93,7 @@ def to_iris(dataarray): # Iris not a hard dependency import iris from iris.fileformats.netcdf import parse_cell_methods - try: - from dask.array import ma as dask_ma - from dask.array import Array - except ImportError: - dask_ma = None + from xarray.core.pycompat import dask_array_type dim_coords = [] aux_coords = [] @@ -124,7 +120,8 @@ def to_iris(dataarray): args['cell_methods'] = parse_cell_methods(dataarray.attrs['cell_methods']) # Create the right type of masked array (should be easier after #1769) - if isinstance(dataarray.data, Array): + if isinstance(dataarray.data, dask_array_type): + from dask.array import ma as dask_ma masked_data = dask_ma.masked_invalid(dataarray) else: masked_data = np.ma.masked_invalid(dataarray) @@ -168,11 +165,7 @@ def from_iris(cube): """ Convert a Iris cube into an DataArray """ import iris.exceptions - try: - from dask.array import ma as dask_ma - from dask.array import Array - except ImportError: - dask_ma = None + from xarray.core.pycompat import dask_array_type name = cube.var_name dims = [] @@ -204,7 +197,8 @@ def from_iris(cube): cube_data = cube.core_data() if hasattr(cube, 'core_data') else cube.data # Deal with dask and numpy masked arrays - if dask_ma and isinstance(cube_data, Array): + if isinstance(cube_data, dask_array_type): + from dask.array import ma as dask_ma filled_data = dask_ma.filled(cube_data, get_fill_value(cube.dtype)) elif isinstance(cube_data, np.ma.MaskedArray): filled_data = np.ma.filled(cube_data, get_fill_value(cube.dtype)) From 0dce7f3480f892b3e1d44608da335cf5f2f8f5c4 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Mon, 18 Dec 2017 18:24:01 +0000 Subject: [PATCH 28/30] Updating to run the iris docs --- doc/environment.yml | 1 + doc/io.rst | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/doc/environment.yml b/doc/environment.yml index 45fa6417e16..b14fba351c1 100644 --- a/doc/environment.yml +++ b/doc/environment.yml @@ -17,3 +17,4 @@ dependencies: - rasterio=0.36.0 - sphinx-gallery - zarr + - iris diff --git a/doc/io.rst b/doc/io.rst index 7a3e1938d6f..c9547f40a2e 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -346,24 +346,26 @@ Iris The Iris_ tool allows easy reading of common meteorological and climate model formats (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is -installed xarray can convert a ``Cube`` into a ``DataArray`` using -:py:meth:`~xarray.Dataset.from_iris`: +installed xarray can convert a ``DataArray`` into a ``Cube`` using +:py:meth:`~xarray.DataArray.to_iris`: .. ipython:: python - :verbatim: - da_cube = xr.Dataset.from_iris(cube) - da_cube + da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], + coords=dict(x=[10, 20, 30, 40], + y=pd.date_range('2000-01-01', periods=5))) + cube = da.to_iris() + cube -Conversly, we can create a new cube object from a ``DataArray`` using -:py:meth:`~xarray.Dataset.to_iris`: +Conversely, we can create a new ``DataArray`` object from a ``Cube`` using +:py:meth:`~xarray.DataArray.from_iris`: .. ipython:: python - :verbatim: - cube = da.to_iris() - cube + da_cube = xr.Dataarray.from_iris(cube) + da_cube + .. _Iris: http://scitools.org.uk/iris From 301b2efd3b51b1aba77569c95f65f49202130965 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Mon, 18 Dec 2017 18:24:48 +0000 Subject: [PATCH 29/30] Fix typo --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index c9547f40a2e..eac941b336b 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -363,7 +363,7 @@ Conversely, we can create a new ``DataArray`` object from a ``Cube`` using .. ipython:: python - da_cube = xr.Dataarray.from_iris(cube) + da_cube = xr.DataArray.from_iris(cube) da_cube From b62367ab42cf2d4db8d4d40504b920f2f0bcfbc7 Mon Sep 17 00:00:00 2001 From: Duncan Watson-Parris Date: Tue, 19 Dec 2017 20:57:07 +0000 Subject: [PATCH 30/30] Fixing long lines --- xarray/convert.py | 26 ++++++++++++-------- xarray/tests/test_dataarray.py | 43 ++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 5cb3f47a050..446bd5a0d35 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -10,15 +10,18 @@ from .core.pycompat import OrderedDict, range from .core.dtypes import get_fill_value from .conventions import ( - maybe_encode_timedelta, maybe_encode_datetime, decode_cf, decode_cf_variable) + maybe_encode_timedelta, maybe_encode_datetime, decode_cf) cdms2_ignored_attrs = {'name', 'tileIndex'} -iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', 'calendar', 'leap_month', 'leap_year', - 'month_lengths', 'coordinates', 'grid_mapping', 'climatology', 'cell_methods', 'formula_terms', - 'compress', 'missing_value', 'add_offset', 'scale_factor', 'valid_max', 'valid_min', - 'valid_range', '_FillValue'} -cell_methods_strings = {'point', 'sum', 'maximum', 'median', 'mid_range', 'minimum', 'mean', 'mode', - 'standard_deviation', 'variance'} +iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', + 'calendar', 'leap_month', 'leap_year', 'month_lengths', + 'coordinates', 'grid_mapping', 'climatology', + 'cell_methods', 'formula_terms', 'compress', + 'missing_value', 'add_offset', 'scale_factor', + 'valid_max', 'valid_min', 'valid_range', '_FillValue'} +cell_methods_strings = {'point', 'sum', 'maximum', 'median', 'mid_range', + 'minimum', 'mean', 'mode', 'standard_deviation', + 'variance'} def encode(var): @@ -117,7 +120,8 @@ def to_iris(dataarray): args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = parse_cell_methods(dataarray.attrs['cell_methods']) + args['cell_methods'] = \ + parse_cell_methods(dataarray.attrs['cell_methods']) # Create the right type of masked array (should be easier after #1769) if isinstance(dataarray.data, dask_array_type): @@ -182,11 +186,13 @@ def from_iris(cube): coord_attrs = _iris_obj_to_attrs(coord) coord_dims = [dims[i] for i in cube.coord_dims(coord)] if not coord.var_name: - raise ValueError("Coordinate '{}' has no var_name attribute".format(coord.name())) + raise ValueError("Coordinate '{}' has no " + "var_name attribute".format(coord.name())) if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) else: - coords[coord.var_name] = ((), np.asscalar(coord.points), coord_attrs) + coords[coord.var_name] = ((), + np.asscalar(coord.points), coord_attrs) array_attrs = _iris_obj_to_attrs(cube) cell_methods = _iris_cell_methods_to_str(cube.cell_methods) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 795513598e5..0399002c6e6 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2863,19 +2863,18 @@ def test_to_and_from_iris(self): coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) - original = DataArray(np.arange(6, dtype='float').reshape(2, 3), coord_dict, - name='Temperature', attrs={'baz': 123, - 'units': 'Kelvin', - 'standard_name': - 'fire_temperature', - 'long_name': - 'Fire Temperature'}, + original = DataArray(np.arange(6, dtype='float').reshape(2, 3), + coord_dict, name='Temperature', + attrs={'baz': 123, 'units': 'Kelvin', + 'standard_name': 'fire_temperature', + 'long_name': 'Fire Temperature'}, dims=('distance', 'time')) # Set a bad value to test the masking logic original.data[0, 2] = np.NaN - original.attrs['cell_methods'] = 'height: mean (comment: A cell method)' + original.attrs['cell_methods'] = \ + 'height: mean (comment: A cell method)' actual = original.to_iris() self.assertArrayEqual(actual.data, original.data) self.assertEqual(actual.var_name, original.name) @@ -2885,7 +2884,8 @@ def test_to_and_from_iris(self): (iris.coords.CellMethod(method='mean', coords=('height',), intervals=(), - comments=('A cell method',)),)) + comments=('A cell method',)),) + ) for coord, orginal_key in zip((actual.coords()), original.coords): original_coord = original.coords[orginal_key] @@ -2930,23 +2930,24 @@ def test_to_and_from_iris_dask(self): coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) - original = DataArray(da.from_array(np.arange(-1, 5, dtype='float').reshape(2, 3), 3), coord_dict, - name='Temperature', attrs={'baz': 123, - 'units': 'Kelvin', - 'standard_name': - 'fire_temperature', - 'long_name': - 'Fire Temperature'}, + original = DataArray(da.from_array( + np.arange(-1, 5, dtype='float').reshape(2, 3), 3), coord_dict, + name='Temperature', + attrs={'baz': 123, 'units': 'Kelvin', + 'standard_name': 'fire_temperature', + 'long_name': 'Fire Temperature'}, dims=('distance', 'time')) # Set a bad value to test the masking logic original.data = da.ma.masked_less(original.data, 0) - original.attrs['cell_methods'] = 'height: mean (comment: A cell method)' + original.attrs['cell_methods'] = \ + 'height: mean (comment: A cell method)' actual = original.to_iris() # Be careful not to trigger the loading of the iris data - actual_data = actual.core_data() if hasattr(actual, 'core_data') else actual.data + actual_data = actual.core_data() if \ + hasattr(actual, 'core_data') else actual.data self.assertArrayEqual(actual_data, original.data) self.assertEqual(actual.var_name, original.name) self.assertItemsEqual([d.var_name for d in actual.dim_coords], @@ -2955,7 +2956,8 @@ def test_to_and_from_iris_dask(self): (iris.coords.CellMethod(method='mean', coords=('height',), intervals=(), - comments=('A cell method',)),)) + comments=('A cell method',)),) + ) for coord, orginal_key in zip((actual.coords()), original.coords): original_coord = original.coords[orginal_key] @@ -2979,7 +2981,8 @@ def test_to_and_from_iris_dask(self): # If the Iris version supports it then we should get a dask array back if hasattr(actual, 'core_data'): pass - # TODO This currently fails due to the decoding loading the data (#1372) + # TODO This currently fails due to the decoding loading + # the data (#1372) # self.assertEqual(type(original.data), type(roundtripped.data)) actual.remove_coord('time')