diff --git a/README.rst b/README.rst index a92afe14bb9..6584a1a6005 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ xray: N-D labeled arrays and datasets in Python .. image:: https://travis-ci.org/xray/xray.svg?branch=master :target: https://travis-ci.org/xray/xray -.. image:: http://img.shields.io/pypi/v/xray.svg?style=flat +.. image:: https://badge.fury.io/py/xray.svg :target: https://pypi.python.org/pypi/xray/ **xray** is an open source project and Python package that aims to bring the @@ -108,4 +108,4 @@ See the License for the specific language governing permissions and limitations under the License. xray includes portions of pandas. The license for pandas is included in the -LICENSES directory. +licenses directory. diff --git a/doc/api.rst b/doc/api.rst index 4043639d87b..a946e518fd2 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -37,7 +37,7 @@ Attributes :toctree: generated/ Dataset.dims - Dataset.vars + Dataset.data_vars Dataset.coords Dataset.attrs diff --git a/doc/data-structures.rst b/doc/data-structures.rst index 936b5d2d5b8..54d9ebd7ae9 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -192,31 +192,34 @@ from the `netCDF`__ file format. __ http://www.unidata.ucar.edu/software/netcdf/ In addition to the dict-like interface of the dataset itself, which can be used -to access any array in a dataset, datasets have four key properties: +to access any variable in a dataset, datasets have four key properties: - ``dims``: a dictionary mapping from dimension names to the fixed length of each dimension (e.g., ``{'x': 6, 'y': 6, 'time': 8}``) -- ``vars``: a dict-like container of arrays (`variables`) -- ``coords``: another dict-like container of arrays intended to label points - used in ``vars`` (e.g., 1-dimensional arrays of numbers, datetime objects or - strings) +- ``data_vars``: a dict-like container of DataArrays corresponding to variables +- ``coords``: another dict-like container of DataArrays intended to label points + used in ``data_vars`` (e.g., 1-dimensional arrays of numbers, datetime + objects or strings) - ``attrs``: an ``OrderedDict`` to hold arbitrary metadata -The distinction between whether an array falls in variables or coordinates is -**mostly semantic**: coordinates are intended for constant/fixed/independent -quantities, unlike the varying/measured/dependent quantities that belong in -variables. Dictionary like access on a dataset will supply arrays found in -either category. However, the distinction does have important implications for -indexing and computation. +The distinction between whether a variables falls in data or coordinates +(borrowed from `CF conventions`_) is mostly semantic, and you can probably get +away with ignoring it if you like: dictionary like access on a dataset will +supply variables found in either category. However, xray does make use of the +distinction for indexing and computations. Coordinates indicate +constant/fixed/independent quantities, unlike the varying/measured/dependent +quantities that belong in data. + +.. _CF conventions: http://cfconventions.org/ Here is an example of how we might structure a dataset for a weather forecast: .. image:: _static/dataset-diagram.png In this example, it would be natural to call ``temperature`` and -``precipitation`` "variables" and all the other arrays "coordinates" because -they label the points along the dimensions. (see [1]_ for more background on -this example). +``precipitation`` "data variables" and all the other arrays "coordinate +variables" because they label the points along the dimensions. (see [1]_ for +more background on this example). .. _dataarray constructor: @@ -224,12 +227,12 @@ Creating a Dataset ~~~~~~~~~~~~~~~~~~ To make an :py:class:`~xray.Dataset` from scratch, supply dictionaries for any -variables coordinates and attributes you would like to insert into the +variables, coordinates and attributes you would like to insert into the dataset. For the ``vars`` and ``coords`` arguments, keys should be the name of the -variable or coordinate, and values should be scalars, 1d arrays or tuples of -the form ``(dims, data[, attrs])`` sufficient to label each array: +variable and values should be scalars, 1d arrays or tuples of the form +``(dims, data[, attrs])`` sufficient to label each array: - ``dims`` should be a sequence of strings. - ``data`` should be a numpy.ndarray (or array-like object) that has a @@ -292,15 +295,15 @@ values given by :py:class:`xray.DataArray` objects: ds['temperature'] -The valid keys include each listed coordinate and variable. +The valid keys include each listed coordinate and data variable. -Variables and coordinates are also contained separately in the -:py:attr:`~xray.Dataset.vars` and :py:attr:`~xray.Dataset.coords` +Data and coordinate variables are also contained separately in the +:py:attr:`~xray.Dataset.data_vars` and :py:attr:`~xray.Dataset.coords` dictionary-like attributes: .. ipython:: python - ds.vars + ds.data_vars ds.coords Finally, like data arrays, datasets also store arbitrary metadata in the form @@ -317,6 +320,16 @@ xray does not enforce any restrictions on attributes, but serialization to some file formats may fail if you use objects that are not strings, numbers or :py:class:`numpy.ndarray` objects. +As a useful shortcut, you can use attribute style access for reading (but not +setting) variables and attributes: + +.. ipython:: python + + ds.temperature + +This is particularly useful in an exploratory context, because you can +tab-complete these variable names with tools like IPython. + Dictionary like methods ~~~~~~~~~~~~~~~~~~~~~~~ @@ -381,7 +394,7 @@ Another useful option is the ability to rename the variables in a dataset: Coordinates ----------- -Coordinates are ancillary arrays stored for ``DataArray`` and ``Dataset`` +Coordinates are ancillary variables stored for ``DataArray`` and ``Dataset`` objects in the ``coords`` attribute: .. ipython:: python @@ -421,12 +434,12 @@ dimension and whose the values are ``Index`` objects: ds.indexes -Switching between coordinates and variables -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Switching between data and coordinate variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To entirely add or removing coordinate arrays, you can use dictionary like -syntax, as shown above. To convert back and forth between coordinates and -variables, use the the :py:meth:`~xray.Dataset.set_coords` and +syntax, as shown above. To convert back and forth between data and +coordinates, use the the :py:meth:`~xray.Dataset.set_coords` and :py:meth:`~xray.Dataset.reset_coords` methods: .. ipython:: python diff --git a/doc/examples/weather-data.rst b/doc/examples/weather-data.rst index 967e7d100df..f168920ab72 100644 --- a/doc/examples/weather-data.rst +++ b/doc/examples/weather-data.rst @@ -36,7 +36,7 @@ Examine a dataset with pandas_ and seaborn_ @savefig examples_pairplot.png sns.pairplot(ds[['tmin', 'tmax', 'time.month']].to_dataframe(), - vars=ds.vars, hue='time.month') + vars=ds.data_vars, hue='time.month') Probability of freeze by calendar month diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 460a8532f13..d90a79b6095 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,9 +16,23 @@ Highlights ~~~~~~~~~~ - Automatic alignment of index labels in arithmetic, dataset cosntruction and - merging. -- Aggregation operations skip missing values by default. + merging. TODO: finish documenting. +- Aggregation operations now skip missing values by default: + + .. ipython:: python + + DataArray([1, 2, np.nan, 3]).mean() + + You can turn this behavior off by supplying the keyword arugment + ``skip_na=False``. +- You will need to update your code if you have been ignoring deprecation + warnings: methods and attributes that were deprecated in xray v0.3 or earlier + have gone away. - Lots of bug fixes. + +Enhancements +~~~~~~~~~~~~ + - Support for reindexing with a fill method. This will especially useful with pandas 0.16, which will support a fill method of ``'nearest'``. diff --git a/xray/conventions.py b/xray/conventions.py index 1386f95fb56..2c3673faca2 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -742,7 +742,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, from .backends.common import AbstractDataStore if isinstance(obj, Dataset): - vars = obj._arrays + vars = obj._variables attrs = obj.attrs extra_coords = set(obj.coords) file_obj = obj._file_obj @@ -855,7 +855,7 @@ def encode_dataset_coordinates(dataset): attrs : dict """ non_dim_coord_names = set(dataset.coords) - set(dataset.dims) - return _encode_coordinates(dataset._arrays, dataset.attrs, + return _encode_coordinates(dataset._variables, dataset.attrs, non_dim_coord_names=non_dim_coord_names) diff --git a/xray/core/alignment.py b/xray/core/alignment.py index 0850a12274f..34601c66d26 100644 --- a/xray/core/alignment.py +++ b/xray/core/alignment.py @@ -1,6 +1,5 @@ import functools import operator -import warnings from collections import defaultdict import numpy as np diff --git a/xray/core/coordinates.py b/xray/core/coordinates.py index e7eba17819b..55c0e91011d 100644 --- a/xray/core/coordinates.py +++ b/xray/core/coordinates.py @@ -51,7 +51,7 @@ def __getitem__(self, key): def __iter__(self): # needs to be in the same order as the dataset variables - for k in self._dataset._arrays: + for k in self._dataset._variables: if k in self._names: yield k @@ -84,7 +84,7 @@ def to_index(self, ordered_dims=None): """ if ordered_dims is None: ordered_dims = self.dims - indexes = [self._dataset._arrays[k].to_index() for k in ordered_dims] + indexes = [self._dataset._variables[k].to_index() for k in ordered_dims] return pd.MultiIndex.from_product(indexes, names=list(ordered_dims)) def _merge_validate(self, other): @@ -96,7 +96,7 @@ def _merge_validate(self, other): promote_dims = {} for k in self: if k in other: - self_var = self._dataset._arrays[k] + self_var = self._dataset._variables[k] other_var = other[k].variable if not self_var.broadcast_equals(other_var): if k in self.dims and k in other.dims: @@ -182,7 +182,7 @@ def __init__(self, dataarray): def __setitem__(self, key, value): with self._dataarray._set_new_dataset() as ds: ds.coords[key] = value - bad_dims = [d for d in ds._arrays[key].dims + bad_dims = [d for d in ds._variables[key].dims if d not in self.dims] if bad_dims: raise ValueError('DataArray does not include all coordinate ' diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index d9276364164..1258ea6171f 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -1,6 +1,5 @@ import contextlib import functools -import warnings import pandas as pd @@ -214,17 +213,6 @@ def _with_replaced_dataset(self, dataset): obj._dataset = dataset return obj - @property - def dataset(self): - """The dataset with which this DataArray is associated. - """ - warnings.warn("the 'dataset' property has been deprecated; " - 'to convert a DataArray into a Dataset, use ' - 'to_dataset(), or to modify DataArray coordiantes in ' - "place, use the 'coords' property", - FutureWarning, stacklevel=2) - return self._dataset - def to_dataset(self, name=None): """Convert a DataArray to a Dataset @@ -265,7 +253,7 @@ def name(self, value): @property def variable(self): - return self._dataset._arrays[self.name] + return self._dataset._variables[self.name] @property def dtype(self): @@ -299,11 +287,6 @@ def values(self, value): def _in_memory(self): return self.variable._in_memory - @property - def as_index(self): - utils.alias_warning('as_index', 'to_index()') - return self.to_index() - def to_index(self): """Convert this variable to a pandas.Index. Only possible for 1D arrays. @@ -326,12 +309,6 @@ def dims(self, value): if self.name in name_map: self._name = name_map[self.name] - @property - def dimensions(self): - """Deprecated; use dims instead""" - utils.alias_warning('dimensions', 'dims') - return self.dims - def _item_key_to_dict(self, key): if utils.is_dict_like(key): return key @@ -370,16 +347,6 @@ def loc(self): """ return _LocIndexer(self) - @property - def attributes(self): - utils.alias_warning('attributes', 'attrs') - return self.variable.attrs - - @attributes.setter - def attributes(self, value): - utils.alias_warning('attributes', 'attrs') - self.variable.attrs = value - @property def attrs(self): """Dictionary storing arbitrary metadata with this array.""" @@ -411,11 +378,6 @@ def coords(self): """ return DataArrayCoordinates(self) - @property - def coordinates(self): - utils.alias_warning('coordinates', 'coords') - return self.coords - def reset_coords(self, names=None, drop=False, inplace=False): """Given names of coordinates, reset them to become variables. @@ -491,8 +453,6 @@ def isel(self, **indexers): ds = self._dataset.isel(**indexers) return self._with_replaced_dataset(ds) - indexed = utils.function_alias(isel, 'indexed') - def sel(self, **indexers): """Return a new DataArray whose dataset is given by selecting index labels along the specified dimension(s). @@ -504,8 +464,6 @@ def sel(self, **indexers): """ return self.isel(**indexing.remap_label_indexers(self, indexers)) - labeled = utils.function_alias(sel, 'labeled') - def reindex_like(self, other, method=None, copy=True): """Conform this object onto the indexes of another object, filling in missing values with NaN. @@ -601,33 +559,6 @@ def rename(self, new_name_or_name_dict): renamed_dataset = self._dataset.rename(name_dict) return renamed_dataset[new_name] - def select_vars(self, *names): - """Returns a new DataArray with only the named variables, as well - as this DataArray's array variable (and all associated coordinates). - - See Also - -------- - Dataset.select_vars - """ - warnings.warn('select_vars has been deprecated; use ' - 'reset_coords(drop=True) instead', - FutureWarning, stacklevel=2) - names = names + (self.name,) - ds = self._dataset.select_vars(*names) - return self._with_replaced_dataset(ds) - - select = utils.function_alias(select_vars, 'select') - - def drop_vars(self, *names): - """Deprecated; use reset_coords(names, drop=True) instead - """ - warnings.warn('DataArray.drop_vars has been deprecated; use ' - 'reset_coords(names, drop=True) instead', - FutureWarning, stacklevel=2) - return self.reset_coords(names, drop=True) - - unselect = utils.function_alias(drop_vars, 'unselect') - def groupby(self, group, squeeze=True): """Returns a GroupBy object for performing grouped operations. @@ -761,27 +692,16 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs): DataArray with this object's array replaced with an array with summarized data and the indicated dimension(s) removed. """ - if 'dimension' in kwargs and dim is None: - dim = kwargs.pop('dimension') - utils.alias_warning('dimension', 'dim') - var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs) drop = set(self.dims) - set(var.dims) # remove all variables associated with any dropped dimensions - drop |= set(k for k, v in iteritems(self._dataset._arrays) + drop |= set(k for k, v in iteritems(self._dataset._variables) if any(dim in drop for dim in v.dims)) ds = self._dataset.drop_vars(*drop) ds[self.name] = var return self._with_replaced_dataset(ds) - @classmethod - def concat(cls, *args, **kwargs): - """Deprecated; use xray.concat instead""" - warnings.warn('xray.DataArray.concat has been deprecated; use ' - 'xray.concat instead', FutureWarning, stacklevel=2) - return cls._concat(*args, **kwargs) - @classmethod def _concat(cls, arrays, dim='concat_dim', indexers=None, mode='different', concat_over=None, compat='equals'): diff --git a/xray/core/dataset.py b/xray/core/dataset.py index e2424e39f41..a97f426f35b 100644 --- a/xray/core/dataset.py +++ b/xray/core/dataset.py @@ -172,22 +172,22 @@ def _as_dataset_variable(name, var): return var -def _align_variables(arrays, join='outer'): +def _align_variables(variables, join='outer'): """Align all DataArrays in the provided dict, leaving other values alone. """ - alignable = [k for k, v in arrays.items() if hasattr(v, 'indexes')] - aligned = align(*[arrays[a] for a in alignable], + alignable = [k for k, v in variables.items() if hasattr(v, 'indexes')] + aligned = align(*[variables[a] for a in alignable], join=join, copy=False) - new_arrays = OrderedDict(arrays) - new_arrays.update(zip(alignable, aligned)) - return new_arrays + new_variables = OrderedDict(variables) + new_variables.update(zip(alignable, aligned)) + return new_variables def _expand_variables(raw_variables, old_variables={}, compat='identical'): """Expand a dictionary of variables. Returns a dictionary of Variable objects suitable for inserting into a - Dataset._arrays dictionary. + Dataset._variables dictionary. This includes converting tuples (dims, data) into Variable objects, converting coordinate variables into Coordinate objects and expanding @@ -255,10 +255,10 @@ def _calculate_dims(variables): def _merge_expand(aligned_self, other, overwrite_vars, compat): - possible_conflicts = dict((k, v) for k, v in aligned_self._arrays.items() + possible_conflicts = dict((k, v) for k, v in aligned_self._variables.items() if k not in overwrite_vars) new_vars, new_coord_names = _expand_variables(other, possible_conflicts, compat) - replace_vars = aligned_self._arrays.copy() + replace_vars = aligned_self._variables.copy() replace_vars.update(new_vars) return replace_vars, new_vars, new_coord_names @@ -267,7 +267,7 @@ def _merge_dataset(self, other, overwrite_vars, compat, join): aligned_self, other = partial_align(self, other, join=join, copy=False) replace_vars, new_vars, new_coord_names = _merge_expand( - aligned_self, other._arrays, overwrite_vars, compat) + aligned_self, other._variables, overwrite_vars, compat) new_coord_names.update(other._coord_names) return replace_vars, new_vars, new_coord_names @@ -310,14 +310,14 @@ def __init__(self, dataset): self._dataset = dataset def __iter__(self): - return (key for key in self._dataset._arrays + return (key for key in self._dataset._variables if key not in self._dataset._coord_names) def __len__(self): - return len(self._dataset._arrays) - len(self._dataset._coord_names) + return len(self._dataset._variables) - len(self._dataset._coord_names) def __contains__(self, key): - return (key in self._dataset._arrays + return (key in self._dataset._variables and key not in self._dataset._coord_names) def __getitem__(self, key): @@ -356,7 +356,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, AttrAccessMixin): # class properties defined for the benefit of __setstate__, which otherwise # runs into trouble because we overrode __getattr__ _attrs = None - _arrays = Frozen({}) + _variables = Frozen({}) def __init__(self, variables=None, coords=None, attrs=None, compat='broadcast_equals'): @@ -374,7 +374,7 @@ def __init__(self, variables=None, coords=None, attrs=None, coords : dict-like, optional Another mapping in the same form as the `variables` argument, except the each item is saved on the dataset as a "coordinate". - These arrays have an associated meaning: they describe + These variables have an associated meaning: they describe constant/fixed/independent quantities, unlike the varying/measured/dependent quantities that belong in `variables`. Coordinates values may be given by 1-dimensional arrays or scalars, @@ -384,7 +384,7 @@ def __init__(self, variables=None, coords=None, attrs=None, attrs : dict-like, optional Global attributes to save on this dataset. """ - self._arrays = OrderedDict() + self._variables = OrderedDict() self._coord_names = set() self._dims = {} self._attrs = None @@ -399,52 +399,52 @@ def __init__(self, variables=None, coords=None, attrs=None, self.attrs = attrs def _add_missing_coords_inplace(self): - """Add missing coordinates to self._arrays + """Add missing coordinates to self._variables """ for dim, size in iteritems(self.dims): - if dim not in self._arrays: + if dim not in self._variables: # This is equivalent to np.arange(size), but # waits to create the array until its actually accessed. data = indexing.LazyIntegerRange(size) coord = variable.Coordinate(dim, data) - self._arrays[dim] = coord + self._variables[dim] = coord - def _update_vars_and_coords(self, new_arrays, new_coord_names={}, + def _update_vars_and_coords(self, new_variables, new_coord_names={}, needs_copy=True, check_coord_names=True): """Add a dictionary of new variables to this dataset. Raises a ValueError if any dimensions have conflicting lengths in the - new dataset. Otherwise will update this dataset's _arrays and + new dataset. Otherwise will update this dataset's _variables and _dims attributes in-place. Set `needs_copy=False` only if this dataset is brand-new and hence can be thrown away if this method fails. """ - # default to creating another copy of arrays so can unroll if we end + # default to creating another copy of variables so can unroll if we end # up with inconsistent dimensions - arrays = self._arrays.copy() if needs_copy else self._arrays + variables = self._variables.copy() if needs_copy else self._variables if check_coord_names: - _assert_empty([k for k in self.vars if k in new_coord_names], + _assert_empty([k for k in self.data_vars if k in new_coord_names], 'coordinates with these names already exist as ' 'variables: %s') - arrays.update(new_arrays) - dims = _calculate_dims(arrays) + variables.update(new_variables) + dims = _calculate_dims(variables) # all checks are complete: it's safe to update - self._arrays = arrays + self._variables = variables self._dims = dims self._add_missing_coords_inplace() self._coord_names.update(new_coord_names) def _set_init_vars_and_dims(self, vars, coords, compat): - """Set the initial value of Dataset arrays and dimensions + """Set the initial value of Dataset variables and dimensions """ _assert_empty([k for k in vars if k in coords], 'redundant variables and coordinates: %s') - arrays = ChainMap(vars, coords) + variables = ChainMap(vars, coords) - aligned = _align_variables(arrays) + aligned = _align_variables(variables) new_variables, new_coord_names = _expand_variables(aligned, compat=compat) @@ -489,22 +489,10 @@ def __getstate__(self): @property def variables(self): - """Deprecated; do not use""" - warnings.warn('the Dataset property `variables` has been deprecated; ' - 'use the dataset itself instead', - FutureWarning, stacklevel=2) - return Frozen(self._arrays) - - @property - def attributes(self): - """Deprecated; do not use""" - utils.alias_warning('attributes', 'attrs', 3) - return self.attrs - - @attributes.setter - def attributes(self, value): - utils.alias_warning('attributes', 'attrs', 3) - self.attrs = value + """Frozen dictionary of xray.Variable objects constituting this + dataset's data + """ + return Frozen(self._variables) def _attrs_copy(self): return None if self._attrs is None else OrderedDict(self._attrs) @@ -530,11 +518,6 @@ def dims(self): """ return Frozen(SortedKeysDict(self._dims)) - @property - def dimensions(self): - utils.alias_warning('dimensions', 'dims') - return self.dims - def load_data(self): """Manually trigger loading of this dataset's data from disk or a remote source into memory and return this dataset. @@ -544,7 +527,7 @@ def load_data(self): load data automatically. However, this method can be necessary when working with many file objects on disk. """ - for v in itervalues(self._arrays): + for v in itervalues(self._variables): v.load_data() return self @@ -555,7 +538,7 @@ def _construct_direct(cls, variables, coord_names, dims, attrs, costly validation """ obj = object.__new__(cls) - obj._arrays = variables + obj._variables = variables obj._coord_names = coord_names obj._dims = dims obj._attrs = attrs @@ -601,9 +584,9 @@ def copy(self, deep=False): """ if deep: variables = OrderedDict((k, v.copy(deep=True)) - for k, v in iteritems(self._arrays)) + for k, v in iteritems(self._variables)) else: - variables = self._arrays.copy() + variables = self._variables.copy() # skip __init__ to avoid costly validation return self._construct_direct(variables, self._coord_names.copy(), self._dims.copy(), self._attrs_copy()) @@ -612,31 +595,31 @@ def _copy_listed(self, names, keep_attrs=True): """Create a new Dataset with the listed variables from this dataset and the all relevant coordinates. Skips all validation. """ - arrays = OrderedDict() + variables = OrderedDict() coord_names = set() for name in names: try: - arrays[name] = self._arrays[name] + variables[name] = self._variables[name] except KeyError: - ref_name, var = _get_virtual_variable(self._arrays, name) - arrays[name] = var + ref_name, var = _get_virtual_variable(self._variables, name) + variables[name] = var if ref_name in self._coord_names: coord_names.add(name) needed_dims = set() - for v in arrays.values(): + for v in variables.values(): needed_dims.update(v._dims) for k in self._coord_names: - if set(self._arrays[k]._dims) <= needed_dims: - arrays[k] = self._arrays[k] + if set(self._variables[k]._dims) <= needed_dims: + variables[k] = self._variables[k] coord_names.add(k) dims = dict((k, self._dims[k]) for k in needed_dims) attrs = self.attrs.copy() if keep_attrs else None - return self._construct_direct(arrays, coord_names, dims, attrs) + return self._construct_direct(variables, coord_names, dims, attrs) def __copy__(self): return self.copy(deep=False) @@ -650,13 +633,13 @@ def __contains__(self, key): """The 'in' operator will return true or false depending on whether 'key' is an array in the dataset or not. """ - return key in self._arrays + return key in self._variables def __len__(self): - return len(self._arrays) + return len(self._variables) def __iter__(self): - return iter(self._arrays) + return iter(self._variables) @property def loc(self): @@ -670,7 +653,7 @@ def virtual_variables(self): """A frozenset of names that don't exist in this dataset but for which DataArrays could be created on demand. - These arrays can be derived by performing simple operations on an + These variables can be derived by performing simple operations on an existing dataset variable or coordinate. Currently, the only implemented virtual variables are time/date components [1_] such as "time.month" or "time.dayofyear", where "time" is the name of a index @@ -682,7 +665,7 @@ def virtual_variables(self): ---------- .. [1] http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components """ - return _list_virtual_variables(self._arrays) + return _list_virtual_variables(self._variables) def __getitem__(self, key): """Access variables or coordinates this dataset as a @@ -723,17 +706,15 @@ def __delitem__(self, key): If this variable is a dimension, all variables containing this dimension are also removed. """ - # nb. this method is intrinsically not very efficient because removing - # items from variables (an OrderedDict) takes O(n) time. def remove(k): - del self._arrays[k] + del self._variables[k] self._coord_names.discard(k) remove(key) if key in self._dims: del self._dims[key] - also_delete = [k for k, v in iteritems(self._arrays) + also_delete = [k for k, v in iteritems(self._variables) if key in v.dims] for key in also_delete: remove(key) @@ -747,7 +728,7 @@ def _all_compat(self, other, compat_str): # require matching order for equality compat = lambda x, y: getattr(x, compat_str)(y) return (self._coord_names == other._coord_names - and utils.dict_equiv(self._arrays, other._arrays, + and utils.dict_equiv(self._variables, other._variables, compat=compat)) def equals(self, other): @@ -791,36 +772,23 @@ def indexes(self): @property def coords(self): - """Dictionary of xray.Coordinate objects used for label based indexing. + """Dictionary of xray.DataArray objects corresponding to coordinate + variables """ return DatasetCoordinates(self) @property - def vars(self): - return Variables(self) - - @property - def coordinates(self): - utils.alias_warning('coordinates', 'coords') - return self.coords - - @property - def noncoords(self): - """Dictionary of DataArrays whose names do not match dimensions. + def data_vars(self): + """Dictionary of xray.DataArray objects corresponding to data variables """ - warnings.warn('the Dataset property `noncoords` has been deprecated; ' - 'use `vars` instead', - FutureWarning, stacklevel=2) - return self.vars + return Variables(self) @property - def noncoordinates(self): - """Dictionary of DataArrays whose names do not match dimensions. - """ - warnings.warn('the Dataset property `noncoordinates` has been ' - 'deprecated; use `vars` instead', + def vars(self): + warnings.warn('the Dataset property `vars` has been deprecated; ' + 'use `data_vars` instead', FutureWarning, stacklevel=2) - return self.vars + return self.data_vars def set_coords(self, names, inplace=False): """Given names of one or more variables, set them as coordinates @@ -839,7 +807,7 @@ def set_coords(self, names, inplace=False): """ # TODO: allow inserting new coordinates with this method, like # DataFrame.set_index? - # nb. check in self._arrays, not self.noncoords to insure that the + # nb. check in self._variables, not self.data_vars to insure that the # operation is idempotent if isinstance(names, basestring): names = [names] @@ -880,7 +848,7 @@ def reset_coords(self, names=None, drop=False, inplace=False): obj._coord_names.difference_update(names) if drop: for name in names: - del obj._arrays[name] + del obj._variables[name] return obj def dump_to_store(self, store, encoder=None): @@ -952,13 +920,11 @@ def isel(self, **indexers): for k, v in iteritems(indexers)] variables = OrderedDict() - for name, var in iteritems(self._arrays): + for name, var in iteritems(self._variables): var_indexers = dict((k, v) for k, v in indexers if k in var.dims) variables[name] = var.isel(**var_indexers) return self._replace_vars_and_dims(variables) - indexed = utils.function_alias(isel, 'indexed') - def sel(self, **indexers): """Returns a new dataset with each array indexed by tick labels along the specified dimension(s). @@ -999,8 +965,6 @@ def sel(self, **indexers): """ return self.isel(**indexing.remap_label_indexers(self, indexers)) - labeled = utils.function_alias(sel, 'labeled') - def reindex_like(self, other, method=None, copy=True): """Conform this object onto the indexes of another object, filling in missing values with NaN. @@ -1082,7 +1046,7 @@ def reindex(self, indexers=None, method=None, copy=True, **kw_indexers): return self.copy(deep=True) if copy else self variables = alignment.reindex_variables( - self._arrays, self.indexes, indexers, method, copy=copy) + self.variables, self.indexes, indexers, method, copy=copy) return self._replace_vars_and_dims(variables) def rename(self, name_dict, inplace=False): @@ -1103,12 +1067,12 @@ def rename(self, name_dict, inplace=False): Dataset with renamed variables and dimensions. """ for k in name_dict: - if k not in self._arrays: + if k not in self: raise ValueError("cannot rename %r because it is not a " "variable in this dataset" % k) variables = OrderedDict() coord_names = set() - for k, v in iteritems(self._arrays): + for k, v in iteritems(self._variables): name = name_dict.get(k, k) dims = tuple(name_dict.get(dim, dim) for dim in v.dims) var = v.copy(deep=False) @@ -1119,7 +1083,7 @@ def rename(self, name_dict, inplace=False): if inplace: self._dims = _calculate_dims(variables) - self._arrays = variables + self._variables = variables self._coord_names = coord_names obj = self else: @@ -1223,23 +1187,13 @@ def merge(self, other, inplace=False, overwrite_vars=set(), return obj def _assert_all_in_dataset(self, names, virtual_okay=False): - bad_names = set(names) - set(self._arrays) + bad_names = set(names) - set(self._variables) if virtual_okay: bad_names -= self.virtual_variables if bad_names: raise ValueError('One or more of the specified variables ' 'cannot be found in this dataset') - def select_vars(self, *names): - """Deprecated. Index with a list instead: ``ds[['var1', 'var2']]`` - """ - warnings.warn('select_vars has been deprecated; index the dataset ' - 'with a list of variables instead', - FutureWarning, stacklevel=2) - return self._copy_listed(names) - - select = utils.function_alias(select_vars, 'select') - def drop_vars(self, *names): """Returns a new dataset without the named variables. @@ -1256,15 +1210,13 @@ def drop_vars(self, *names): """ self._assert_all_in_dataset(names) drop = set(names) - drop |= set(k for k, v in iteritems(self._arrays) + drop |= set(k for k, v in iteritems(self._variables) if any(name in v.dims for name in names)) - variables = OrderedDict((k, v) for k, v in iteritems(self._arrays) + variables = OrderedDict((k, v) for k, v in iteritems(self._variables) if k not in drop) coord_names = set(k for k in self._coord_names if k in variables) return self._replace_vars_and_dims(variables, coord_names) - unselect = utils.function_alias(drop_vars, 'unselect') - def groupby(self, group, squeeze=True): """Returns a GroupBy object for performing grouped operations. @@ -1322,9 +1274,9 @@ def transpose(self, *dims): 'permuted dataset dimensions (%s)' % (dims, tuple(self.dims))) ds = self.copy() - for name, var in iteritems(self._arrays): + for name, var in iteritems(self._variables): var_dims = tuple(dim for dim in dims if dim in var.dims) - ds._arrays[name] = var.transpose(*var_dims) + ds._variables[name] = var.transpose(*var_dims) return ds @property @@ -1388,13 +1340,13 @@ def dropna(self, dim, how='any', thresh=None, subset=None): raise ValueError('%s must be a single dataset dimension' % dim) if subset is None: - subset = list(self.vars) + subset = list(self.data_vars) count = np.zeros(self.dims[dim], dtype=int) size = 0 for k in subset: - array = self._arrays[k] + array = self._variables[k] if dim in array.dims: dims = [d for d in array.dims if d != dim] count += array.count(dims) @@ -1441,10 +1393,6 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if 'dimension' in kwargs and dim is None: - dim = kwargs.pop('dimension') - utils.alias_warning('dimension', 'dim') - if isinstance(dim, basestring): dims = set([dim]) elif dim is None: @@ -1456,7 +1404,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False, 'Dataset does not contain the dimensions: %s') variables = OrderedDict() - for name, var in iteritems(self._arrays): + for name, var in iteritems(self._variables): reduce_dims = [dim for dim in var.dims if dim in dims] if reduce_dims or not var.dims: if name not in self.coords: @@ -1505,17 +1453,10 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs): noncoordinate are dropped. """ variables = OrderedDict((k, func(v, *args, **kwargs)) - for k, v in iteritems(self.vars)) + for k, v in iteritems(self.data_vars)) attrs = self.attrs if keep_attrs else None return type(self)(variables, attrs=attrs) - @classmethod - def concat(cls, *args, **kwargs): - """Deprecated; use xray.concat instead""" - warnings.warn('xray.Dataset.concat has been deprecated; use ' - 'xray.concat instead', FutureWarning, stacklevel=2) - return cls._concat(*args, **kwargs) - @classmethod def _concat(cls, datasets, dim='concat_dim', indexers=None, mode='different', concat_over=None, compat='equals'): @@ -1551,11 +1492,11 @@ def differs(vname, v): # simple helper function which compares a variable # across all datasets and indicates whether that # variable differs or not. - return any(not ds._arrays[vname].equals(v) + return any(not ds._variables[vname].equals(v) for ds in datasets[1:]) # non_indexes = iteritems(datasets[0].nonindexes) # all nonindexes that are not the same in each dataset - concat_over.update(k for k, v in iteritems(datasets[0]._arrays) + concat_over.update(k for k, v in iteritems(datasets[0]._variables) if k not in datasets[0]._dims and differs(k, v)) elif mode == 'all': # concatenate all nonindexes @@ -1567,7 +1508,7 @@ def differs(vname, v): else: raise ValueError("Unexpected value for mode: %s" % mode) - if any(v not in datasets[0]._arrays for v in concat_over): + if any(v not in datasets[0]._variables for v in concat_over): raise ValueError('not all elements in concat_over %r found ' 'in the first dataset %r' % (concat_over, datasets[0])) @@ -1576,13 +1517,13 @@ def differs(vname, v): auto_concat_dims = set([dim_name]) if hasattr(dim, 'dims'): auto_concat_dims |= set(dim.dims) - for k, v in iteritems(datasets[0]._arrays): + for k, v in iteritems(datasets[0]._variables): if k == dim_name or auto_concat_dims.intersection(v.dims): concat_over.add(k) # create the new dataset and add constant variables concatenated = cls({}, attrs=datasets[0].attrs) - for k, v in iteritems(datasets[0]._arrays): + for k, v in iteritems(datasets[0]._variables): if k not in concat_over: concatenated[k] = v @@ -1592,10 +1533,10 @@ def differs(vname, v): if (compat == 'identical' and not utils.dict_equiv(ds.attrs, concatenated.attrs)): raise ValueError('dataset global attributes not equal') - for k, v in iteritems(ds._arrays): - if k not in concatenated._arrays and k not in concat_over: + for k, v in iteritems(ds._variables): + if k not in concatenated._variables and k not in concat_over: raise ValueError('encountered unexpected variable %r' % k) - elif (k in concatenated._arrays and k != dim_name and + elif (k in concatenated._variables and k != dim_name and not getattr(v, compat)(concatenated[k])): verb = 'equal' if compat == 'equals' else compat raise ValueError( @@ -1610,7 +1551,7 @@ def _ensure_common_dims(vars): # stack up each variable to fill-out the dataset for k in concat_over: - vars = _ensure_common_dims([ds._arrays[k] for ds in datasets]) + vars = _ensure_common_dims([ds._variables[k] for ds in datasets]) concatenated[k] = variable.Variable.concat(vars, dim, indexers) concatenated._coord_names.update(datasets[0].coords) @@ -1623,7 +1564,7 @@ def _ensure_common_dims(vars): def _to_dataframe(self, ordered_dims): columns = [k for k in self if k not in self.dims] - data = [self._arrays[k].set_dims(ordered_dims).values.reshape(-1) + data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1) for k in columns] index = self.coords.to_index(ordered_dims) return pd.DataFrame(OrderedDict(zip(columns, data)), index=index) @@ -1684,8 +1625,8 @@ def _unary_op(f): @functools.wraps(f) def func(self, *args, **kwargs): ds = self.coords.to_dataset() - for k in self.vars: - ds._arrays[k] = f(self._arrays[k], *args, **kwargs) + for k in self.data_vars: + ds._variables[k] = f(self._variables[k], *args, **kwargs) return ds return func @@ -1704,7 +1645,7 @@ def func(self, other): other_coords = getattr(other, 'coords', None) ds = self.coords.merge(other_coords) g = f if not reflexive else lambda x, y: f(y, x) - _calculate_binary_op(g, self, other, ds._arrays) + _calculate_binary_op(g, self, other, ds._variables) return ds return func @@ -1720,37 +1661,39 @@ def func(self, other): # can rollback in case of an exception # note: when/if we support automatic alignment, only copy the # variables that will actually be included in the result - dest_vars = dict((k, self._arrays[k].copy()) - for k in self.vars) + dest_vars = dict((k, self._variables[k].copy()) + for k in self.data_vars) _calculate_binary_op(f, dest_vars, other, dest_vars) - self._arrays.update(dest_vars) + self._variables.update(dest_vars) return self return func def _calculate_binary_op(f, dataset, other, dest_vars): - dataset_arrays = getattr(dataset, '_arrays', dataset) - dataset_vars = getattr(dataset, 'vars', dataset) + dataset_variables = getattr(dataset, 'variables', dataset) + dataset_data_vars = getattr(dataset, 'data_vars', dataset) if utils.is_dict_like(other): - other_arrays = getattr(other, '_arrays', other) - other_vars = getattr(other, 'vars', other) + other_variables = getattr(other, 'variables', other) + other_data_vars = getattr(other, 'data_vars', other) performed_op = False - for k in dataset_vars: - if k in other_vars: - dest_vars[k] = f(dataset_arrays[k], other_arrays[k]) + for k in dataset_data_vars: + if k in other_data_vars: + dest_vars[k] = f(dataset_variables[k], other_variables[k]) performed_op = True elif k in dest_vars: # we are doing an in-place operation - raise ValueError('datasets must have the same variables for ' - 'in-place arithmetic operations: %s, %s' - % (list(dataset_vars), list(other_vars))) + raise ValueError('datasets must have the same data variables ' + 'for in-place arithmetic operations: %s, %s' + % (list(dataset_data_vars), + list(other_data_vars))) if not performed_op: raise ValueError('datasets have no overlapping variables: %s, %s' - % (list(dataset_vars), list(other_vars))) + % (list(dataset_data_vars), + list(other_data_vars))) else: other_variable = getattr(other, 'variable', other) - for k in dataset_vars: - dest_vars[k] = f(dataset_arrays[k], other_variable) + for k in dataset_data_vars: + dest_vars[k] = f(dataset_variables[k], other_variable) ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) diff --git a/xray/core/formatting.py b/xray/core/formatting.py index cac276e0b08..2f520750978 100644 --- a/xray/core/formatting.py +++ b/xray/core/formatting.py @@ -172,7 +172,7 @@ def _mapping_repr(mapping, title, summarizer, col_width=None): summarizer=summarize_coord) -vars_repr = functools.partial(_mapping_repr, title='Variables', +vars_repr = functools.partial(_mapping_repr, title='Data variables', summarizer=summarize_var) @@ -223,7 +223,7 @@ def dataset_repr(ds): summary.append('%s(%s)' % (dims_start, ', '.join(all_dim_strings))) summary.append(coords_repr(ds.coords, col_width=col_width)) - summary.append(vars_repr(ds.vars, col_width=col_width)) + summary.append(vars_repr(ds.data_vars, col_width=col_width)) if ds.attrs: summary.append(attrs_repr(ds.attrs)) diff --git a/xray/core/variable.py b/xray/core/variable.py index ac056fb1a65..6a8b9d22fc5 100644 --- a/xray/core/variable.py +++ b/xray/core/variable.py @@ -337,11 +337,6 @@ def to_coord(self): return Coordinate(self.dims, self._data, self._attrs, encoding=self._encoding, fastpath=True) - @property - def as_index(self): - utils.alias_warning('as_index', 'to_index()') - return self.to_index() - def to_index(self): """Convert this variable to a pandas.Index""" return self.to_coord().to_index() @@ -352,11 +347,6 @@ def dims(self): """ return self._dims - @property - def dimensions(self): - utils.alias_warning('dimensions', 'dims') - return self.dims - def _parse_dimensions(self, dims): if isinstance(dims, basestring): dims = (dims,) @@ -415,16 +405,6 @@ def __setitem__(self, key, value): key = self._item_key_to_tuple(key) self._data_cached()[key] = value - @property - def attributes(self): - utils.alias_warning('attributes', 'attrs', 3) - return self._attributes - - @attributes.setter - def attributes(self, value): - utils.alias_warning('attributes', 'attrs', 3) - self._attributes = OrderedDict(value) - @property def attrs(self): """Dictionary of local attributes on this variable. @@ -500,8 +480,6 @@ def isel(self, **indexers): key[i] = indexers[dim] return self[tuple(key)] - indexed = utils.function_alias(isel, 'indexed') - def transpose(self, *dims): """Return a new Variable object with transposed dimensions. @@ -625,10 +603,6 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, Array with summarized data and the indicated dimension(s) removed. """ - if 'dimension' in kwargs and dim is None: - dim = kwargs.pop('dimension') - utils.alias_warning('dimension', 'dim') - if dim is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dim' arguments") diff --git a/xray/test/__init__.py b/xray/test/__init__.py index 4ecd873628e..5cb8f78b616 100644 --- a/xray/test/__init__.py +++ b/xray/test/__init__.py @@ -100,8 +100,8 @@ def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08): self.assertEqual(sorted(d1, key=str), sorted(d2, key=str)) self.assertItemsEqual(d1.coords, d2.coords) for k in d1: - v1 = d1._arrays[k] - v2 = d2._arrays[k] + v1 = d1.variables[k] + v2 = d2.variables[k] self.assertVariableAllClose(v1, v2, rtol=rtol, atol=atol) def assertCoordinatesEqual(self, d1, d2): diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index de732edecdb..ce7932c8fbe 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -44,7 +44,7 @@ def test_properties(self): for k, v in iteritems(self.dv.coords): self.assertArrayEqual(v, self.ds.coords[k]) with self.assertRaises(AttributeError): - self.dv.dataset = self.ds + self.dv.dataset self.assertIsInstance(self.ds['x'].to_index(), pd.Index) with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): self.ds['foo'].to_index() diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py index 40e20320c6c..f618630a15a 100644 --- a/xray/test/test_dataset.py +++ b/xray/test/test_dataset.py @@ -70,7 +70,7 @@ def test_repr(self): * dim3 (dim3) %s 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04 ... numbers (dim3) int64 0 1 2 0 0 1 1 2 2 3 - Variables: + Data variables: var1 (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 -2.427 -0.4289 ... var2 (dim1, dim2) float64 1.162 -1.097 -2.123 1.04 -0.4034 -0.126 -0.8375 -1.606 ... var3 (dim3, dim1) float64 0.5565 -0.2121 0.4563 1.545 -0.2397 0.1433 0.2538 ... @@ -84,7 +84,7 @@ def test_repr(self): Dimensions: () Coordinates: *empty* - Variables: + Data variables: *empty*""") actual = '\n'.join(x.rstrip() for x in repr(Dataset()).split('\n')) print(actual) @@ -97,7 +97,7 @@ def test_repr(self): Dimensions: () Coordinates: *empty* - Variables: + Data variables: foo float64 1.0""") actual = '\n'.join(x.rstrip() for x in repr(data).split('\n')) print(actual) @@ -186,7 +186,7 @@ def test_constructor_with_coords(self): Dataset({'a': ('x', [1])}, {'a': ('x', [1])}) ds = Dataset({}, {'a': ('x', [1])}) - self.assertFalse(ds.vars) + self.assertFalse(ds.data_vars) self.assertItemsEqual(ds.coords.keys(), ['x', 'a']) def test_properties(self): @@ -194,16 +194,16 @@ def test_properties(self): self.assertEqual(ds.dims, {'dim1': 8, 'dim2': 9, 'dim3': 10, 'time': 20}) - self.assertItemsEqual(ds, list(ds._arrays)) - self.assertItemsEqual(ds.keys(), list(ds._arrays)) + self.assertItemsEqual(ds, list(ds.variables)) + self.assertItemsEqual(ds.keys(), list(ds.variables)) self.assertEqual(len(ds), 8) - self.assertItemsEqual(ds.vars, ['var1', 'var2', 'var3']) - self.assertItemsEqual(ds.vars.keys(), ['var1', 'var2', 'var3']) - self.assertIn('var1', ds.vars) - self.assertNotIn('dim1', ds.vars) - self.assertNotIn('numbers', ds.vars) - self.assertEqual(len(ds.vars), 3) + self.assertItemsEqual(ds.data_vars, ['var1', 'var2', 'var3']) + self.assertItemsEqual(ds.data_vars.keys(), ['var1', 'var2', 'var3']) + self.assertIn('var1', ds.data_vars) + self.assertNotIn('dim1', ds.data_vars) + self.assertNotIn('numbers', ds.data_vars) + self.assertEqual(len(ds.data_vars), 3) self.assertItemsEqual(ds.indexes, ['dim1', 'dim2', 'dim3', 'time']) self.assertEqual(len(ds.indexes), 4) @@ -236,7 +236,7 @@ def test_variable(self): a = Dataset() d = np.random.random((10, 3)) a['foo'] = (('time', 'x',), d) - self.assertTrue('foo' in a._arrays) + self.assertTrue('foo' in a.variables) self.assertTrue('foo' in a) a['bar'] = (('time', 'x',), d) # order of creation is preserved @@ -255,7 +255,7 @@ def test_modify_inplace(self): self.assertTrue('x' in a.coords) self.assertIsInstance(a.coords['x'].to_index(), pd.Index) - self.assertVariableIdentical(a.coords['x'], a._arrays['x']) + self.assertVariableIdentical(a.coords['x'], a.variables['x']) b = Dataset() b['x'] = ('x', vec, attributes) self.assertVariableIdentical(a['x'], b['x']) @@ -501,19 +501,19 @@ def test_isel(self): ret = data.isel(dim1=0) self.assertEqual({'time': 20, 'dim2': 9, 'dim3': 10}, ret.dims) - self.assertItemsEqual(data.vars, ret.vars) + self.assertItemsEqual(data.data_vars, ret.data_vars) self.assertItemsEqual(data.coords, ret.coords) self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1']) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dims) - self.assertItemsEqual(data.vars, ret.vars) + self.assertItemsEqual(data.data_vars, ret.data_vars) self.assertItemsEqual(data.coords, ret.coords) self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1']) ret = data.isel(time=0, dim1=0, dim2=slice(5)) self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dims) - self.assertItemsEqual(data.vars, ret.vars) + self.assertItemsEqual(data.data_vars, ret.data_vars) self.assertItemsEqual(data.coords, ret.coords) self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1', 'time']) @@ -710,8 +710,8 @@ def test_copy(self): for copied in [data.copy(deep=False), copy(data)]: self.assertDatasetIdentical(data, copied) for k in data: - v0 = data._arrays[k] - v1 = copied._arrays[k] + v0 = data.variables[k] + v1 = copied.variables[k] self.assertIs(v0, v1) copied['foo'] = ('z', np.arange(5)) self.assertNotIn('foo', data) @@ -719,8 +719,8 @@ def test_copy(self): for copied in [data.copy(deep=True), deepcopy(data)]: self.assertDatasetIdentical(data, copied) for k in data: - v0 = data._arrays[k] - v1 = copied._arrays[k] + v0 = data.variables[k] + v1 = copied.variables[k] self.assertIsNot(v0, v1) def test_rename(self): @@ -728,7 +728,7 @@ def test_rename(self): newnames = {'var1': 'renamed_var1', 'dim2': 'renamed_dim2'} renamed = data.rename(newnames) - variables = OrderedDict(data._arrays) + variables = OrderedDict(data.variables) for k, v in iteritems(newnames): variables[v] = variables.pop(k) @@ -741,7 +741,7 @@ def test_rename(self): self.assertVariableEqual(Variable(dims, v.values, v.attrs), renamed[k]) self.assertEqual(v.encoding, renamed[k].encoding) - self.assertEqual(type(v), type(renamed._arrays[k])) + self.assertEqual(type(v), type(renamed.variables[k])) self.assertTrue('var1' not in renamed) self.assertTrue('dim2' not in renamed) @@ -889,7 +889,7 @@ def test_merge_auto_align(self): def test_getitem(self): data = create_test_data() self.assertIsInstance(data['var1'], DataArray) - self.assertVariableEqual(data['var1'], data._arrays['var1']) + self.assertVariableEqual(data['var1'], data.variables['var1']) with self.assertRaises(KeyError): data['notfound'] with self.assertRaises(KeyError): @@ -914,7 +914,7 @@ def test_virtual_variables(self): self.assertVariableEqual(data['time.dayofyear'], Variable('time', 1 + np.arange(20))) self.assertArrayEqual(data['time.month'].values, - data._arrays['time'].to_index().month) + data.variables['time'].to_index().month) self.assertArrayEqual(data['time.season'].values, 1) # test virtual variable math self.assertArrayEqual(data['time.dayofyear'] + 1, 2 + np.arange(20)) @@ -1012,7 +1012,7 @@ def test_squeeze(self): def get_args(v): return [set(args[0]) & set(v.dims)] if args else [] expected = Dataset(dict((k, v.squeeze(*get_args(v))) - for k, v in iteritems(data._arrays))) + for k, v in iteritems(data.variables))) expected.set_coords(data.coords, inplace=True) self.assertDatasetIdentical(expected, data.squeeze(*args)) # invalid squeeze @@ -1153,7 +1153,7 @@ def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) - for k, v in iteritems(dataset.vars)), + for k, v in iteritems(dataset.data_vars)), dataset.coords, attrs=dataset.attrs) for dim in ['dim1', 'dim2', 'dim3']: @@ -1403,7 +1403,7 @@ def test_reduce(self): actual = data.max() expected = Dataset(dict((k, v.max()) - for k, v in iteritems(data.vars))) + for k, v in iteritems(data.data_vars))) self.assertDatasetEqual(expected, actual) self.assertDatasetEqual(data.min(dim=['dim1']), @@ -1586,8 +1586,8 @@ def test_dataset_dataset_math(self): expected = ds.apply(lambda x: 2 * x) self.assertDatasetIdentical(expected, 2 * ds) self.assertDatasetIdentical(expected, ds + ds) - self.assertDatasetIdentical(expected, ds + ds.vars) - self.assertDatasetIdentical(expected, ds + dict(ds.vars)) + self.assertDatasetIdentical(expected, ds + ds.data_vars) + self.assertDatasetIdentical(expected, ds + dict(ds.data_vars)) actual = ds.copy(deep=True) actual += ds