From a84ea12d34a4d1a542a34abbcf2feb3937a6a282 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 12 Mar 2019 09:01:17 -0600 Subject: [PATCH 01/14] some docs updates (#2746) * Friendlier io title. * Fix lists. * Fix *args, **kwargs "inline emphasis..." * misc * Reference xarray_extras for csv writing. Closes #2289 * Add metpy accessor. Closes #461 * fix transpose docstring. Closes #2576 * Revert "Fix lists." This reverts commit 39983a5835612d7158ae91a9cce7196a03742983. * Revert "Fix *args, **kwargs" This reverts commit 1b9da35ef43e44ce7855f2ab8406a781c9a68933. * Add MetPy to related projects. * Add Weather and Climate specific page. * Add hvplot. * Note open_dataset, mfdataset open files as read-only (closes #2345). * Update metpy 1 Co-Authored-By: dcherian * Update doc/weather-climate.rst Co-Authored-By: dcherian --- doc/index.rst | 2 + doc/io.rst | 13 ++-- doc/plotting.rst | 4 + doc/related-projects.rst | 1 + doc/time-series.rst | 137 --------------------------------- doc/weather-climate.rst | 160 +++++++++++++++++++++++++++++++++++++++ doc/whats-new.rst | 2 +- xarray/backends/api.py | 14 ++++ xarray/core/dataarray.py | 13 ++-- xarray/core/dataset.py | 5 +- xarray/core/variable.py | 4 +- 11 files changed, 202 insertions(+), 153 deletions(-) create mode 100644 doc/weather-climate.rst diff --git a/doc/index.rst b/doc/index.rst index dbe911011cd..1d3bb110ddb 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -52,6 +52,7 @@ Documentation * :doc:`reshaping` * :doc:`combining` * :doc:`time-series` +* :doc:`weather-climate` * :doc:`pandas` * :doc:`io` * :doc:`dask` @@ -70,6 +71,7 @@ Documentation reshaping combining time-series + weather-climate pandas io dask diff --git a/doc/io.rst b/doc/io.rst index 0dc5181f9b8..51c747189da 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -1,11 +1,11 @@ .. _io: -Serialization and IO -==================== +Reading and writing files +========================= xarray supports direct serialization and IO to several file formats, from simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` -format. +format (recommended). .. ipython:: python :suppress: @@ -739,11 +739,14 @@ options are listed on the PseudoNetCDF page. .. _PseudoNetCDF: http://github.com/barronh/PseudoNetCDF -Formats supported by Pandas ---------------------------- +CSV and other formats supported by Pandas +----------------------------------------- For more options (tabular formats and CSV files in particular), consider exporting your objects to pandas and using its broad range of `IO tools`_. +For CSV files, one might also consider `xarray_extras`_. + +.. _xarray_extras: https://xarray-extras.readthedocs.io/en/latest/api/csv.html .. _IO tools: http://pandas.pydata.org/pandas-docs/stable/io.html diff --git a/doc/plotting.rst b/doc/plotting.rst index a705c683594..c8f568e516f 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -39,6 +39,10 @@ For more extensive plotting applications consider the following projects: data structures for building even complex visualizations easily." Includes native support for xarray objects. +- `hvplot `_: ``hvplot`` makes it very easy to produce + dynamic plots (backed by ``Holoviews`` or ``Geoviews``) by adding a ``hvplot`` + accessor to DataArrays. + - `Cartopy `_: Provides cartographic tools. diff --git a/doc/related-projects.rst b/doc/related-projects.rst index c89e324ff7c..e899022e5d4 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -13,6 +13,7 @@ Geosciences - `aospy `_: Automated analysis and management of gridded climate data. - `infinite-diff `_: xarray-based finite-differencing, focused on gridded climate/meterology data - `marc_analysis `_: Analysis package for CESM/MARC experiments and output. +- `MetPy `_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data. - `MPAS-Analysis `_: Analysis for simulations produced with Model for Prediction Across Scales (MPAS) components and the Accelerated Climate Model for Energy (ACME). - `OGGM `_: Open Global Glacier Model - `Oocgcm `_: Analysis of large gridded geophysical datasets diff --git a/doc/time-series.rst b/doc/time-series.rst index 3249dad2ec6..53efcd45ba2 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -212,140 +212,3 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``. For more examples of using grouped operations on a time dimension, see :ref:`toy weather data`. - - -.. _CFTimeIndex: - -Non-standard calendars and dates outside the Timestamp-valid range ------------------------------------------------------------------- - -Through the standalone ``cftime`` library and a custom subclass of -:py:class:`pandas.Index`, xarray supports a subset of the indexing -functionality enabled through the standard :py:class:`pandas.DatetimeIndex` for -dates from non-standard calendars commonly used in climate science or dates -using a standard calendar, but outside the `Timestamp-valid range`_ -(approximately between years 1678 and 2262). - -.. note:: - - As of xarray version 0.11, by default, :py:class:`cftime.datetime` objects - will be used to represent times (either in indexes, as a - :py:class:`~xarray.CFTimeIndex`, or in data arrays with dtype object) if - any of the following are true: - - - The dates are from a non-standard calendar - - Any dates are outside the Timestamp-valid range. - - Otherwise pandas-compatible dates from a standard calendar will be - represented with the ``np.datetime64[ns]`` data type, enabling the use of a - :py:class:`pandas.DatetimeIndex` or arrays with dtype ``np.datetime64[ns]`` - and their full set of associated features. - -For example, you can create a DataArray indexed by a time -coordinate with dates from a no-leap calendar and a -:py:class:`~xarray.CFTimeIndex` will automatically be used: - -.. ipython:: python - - from itertools import product - from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in - product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') - -xarray also includes a :py:func:`~xarray.cftime_range` function, which enables -creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For -instance, we can create the same dates and DataArray we created above using: - -.. ipython:: python - - dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') - -For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: - -- `Partial datetime string indexing`_ using strictly `ISO 8601-format`_ partial - datetime strings: - -.. ipython:: python - - da.sel(time='0001') - da.sel(time=slice('0001-05', '0002-02')) - -- Access of basic datetime components via the ``dt`` accessor (in this case - just "year", "month", "day", "hour", "minute", "second", "microsecond", - "season", "dayofyear", and "dayofweek"): - -.. ipython:: python - - da.time.dt.year - da.time.dt.month - da.time.dt.season - da.time.dt.dayofyear - da.time.dt.dayofweek - -- Group-by operations based on datetime accessor attributes (e.g. by month of - the year): - -.. ipython:: python - - da.groupby('time.month').sum() - -- Interpolation using :py:class:`cftime.datetime` objects: - -.. ipython:: python - - da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - -- Interpolation using datetime strings: - -.. ipython:: python - - da.interp(time=['0001-01-15', '0001-02-15']) - -- Differentiation: - -.. ipython:: python - - da.differentiate('time') - -- Serialization: - -.. ipython:: python - - da.to_netcdf('example-no-leap.nc') - xr.open_dataset('example-no-leap.nc') - -- And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: - -.. ipython:: python - - da.resample(time='81T', closed='right', label='right', base=3).mean() - -.. note:: - - - For some use-cases it may still be useful to convert from - a :py:class:`~xarray.CFTimeIndex` to a :py:class:`pandas.DatetimeIndex`, - despite the difference in calendar types. The recommended way of doing this - is to use the built-in :py:meth:`~xarray.CFTimeIndex.to_datetimeindex` - method: - - .. ipython:: python - :okwarning: - - modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(range(24), [('time', modern_times)]) - da - datetimeindex = da.indexes['time'].to_datetimeindex() - da['time'] = datetimeindex - - However in this case one should use caution to only perform operations which - do not depend on differences between dates (e.g. differentiation, - interpolation, or upsampling with resample), as these could introduce subtle - and silent errors due to the difference in calendar types between the dates - encoded in your data and the dates stored in memory. - -.. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#timestamp-limitations -.. _ISO 8601-format: https://en.wikipedia.org/wiki/ISO_8601 -.. _partial datetime string indexing: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#partial-string-indexing diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst new file mode 100644 index 00000000000..1950ba62ffb --- /dev/null +++ b/doc/weather-climate.rst @@ -0,0 +1,160 @@ +.. _weather-climate: + +Weather and climate data +======================== + +.. ipython:: python + :suppress: + + import xarray as xr + +``xarray`` can leverage metadata that follows the `Climate and Forecast (CF) conventions`_ if present. Examples include automatic labelling of plots with descriptive names and units if proper metadata is present (see :ref:`plotting`) and support for non-standard calendars used in climate science through the ``cftime`` module (see :ref:`CFTimeIndex`). There are also a number of geosciences-focused projects that build on xarray (see :ref:`related-projects`). + +.. _Climate and Forecast (CF) conventions: http://cfconventions.org + +.. _metpy_accessor: + +CF-compliant coordinate variables +--------------------------------- + +`MetPy`_ adds a ``metpy`` accessor that allows accessing coordinates with appropriate CF metadata using generic names ``x``, ``y``, ``vertical`` and ``time``. There is also a `cartopy_crs` attribute that provides projection information, parsed from the appropriate CF metadata, as a `Cartopy`_ projection object. See `their documentation`_ for more information. + +.. _`MetPy`: https://unidata.github.io/MetPy/dev/index.html +.. _`their documentation`: https://unidata.github.io/MetPy/dev/tutorials/xarray_tutorial.html#coordinates +.. _`Cartopy`: https://scitools.org.uk/cartopy/docs/latest/crs/projections.html + +.. _CFTimeIndex: + +Non-standard calendars and dates outside the Timestamp-valid range +------------------------------------------------------------------ + +Through the standalone ``cftime`` library and a custom subclass of +:py:class:`pandas.Index`, xarray supports a subset of the indexing +functionality enabled through the standard :py:class:`pandas.DatetimeIndex` for +dates from non-standard calendars commonly used in climate science or dates +using a standard calendar, but outside the `Timestamp-valid range`_ +(approximately between years 1678 and 2262). + +.. note:: + + As of xarray version 0.11, by default, :py:class:`cftime.datetime` objects + will be used to represent times (either in indexes, as a + :py:class:`~xarray.CFTimeIndex`, or in data arrays with dtype object) if + any of the following are true: + + - The dates are from a non-standard calendar + - Any dates are outside the Timestamp-valid range. + + Otherwise pandas-compatible dates from a standard calendar will be + represented with the ``np.datetime64[ns]`` data type, enabling the use of a + :py:class:`pandas.DatetimeIndex` or arrays with dtype ``np.datetime64[ns]`` + and their full set of associated features. + +For example, you can create a DataArray indexed by a time +coordinate with dates from a no-leap calendar and a +:py:class:`~xarray.CFTimeIndex` will automatically be used: + +.. ipython:: python + + from itertools import product + from cftime import DatetimeNoLeap + dates = [DatetimeNoLeap(year, month, 1) for year, month in + product(range(1, 3), range(1, 13))] + da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + +xarray also includes a :py:func:`~xarray.cftime_range` function, which enables +creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For +instance, we can create the same dates and DataArray we created above using: + +.. ipython:: python + + dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') + da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + +For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: + +- `Partial datetime string indexing`_ using strictly `ISO 8601-format`_ partial + datetime strings: + +.. ipython:: python + + da.sel(time='0001') + da.sel(time=slice('0001-05', '0002-02')) + +- Access of basic datetime components via the ``dt`` accessor (in this case + just "year", "month", "day", "hour", "minute", "second", "microsecond", + "season", "dayofyear", and "dayofweek"): + +.. ipython:: python + + da.time.dt.year + da.time.dt.month + da.time.dt.season + da.time.dt.dayofyear + da.time.dt.dayofweek + +- Group-by operations based on datetime accessor attributes (e.g. by month of + the year): + +.. ipython:: python + + da.groupby('time.month').sum() + +- Interpolation using :py:class:`cftime.datetime` objects: + +.. ipython:: python + + da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) + +- Interpolation using datetime strings: + +.. ipython:: python + + da.interp(time=['0001-01-15', '0001-02-15']) + +- Differentiation: + +.. ipython:: python + + da.differentiate('time') + +- Serialization: + +.. ipython:: python + + da.to_netcdf('example-no-leap.nc') + xr.open_dataset('example-no-leap.nc') + +- And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: + +.. ipython:: python + + da.resample(time='81T', closed='right', label='right', base=3).mean() + +.. note:: + + + For some use-cases it may still be useful to convert from + a :py:class:`~xarray.CFTimeIndex` to a :py:class:`pandas.DatetimeIndex`, + despite the difference in calendar types. The recommended way of doing this + is to use the built-in :py:meth:`~xarray.CFTimeIndex.to_datetimeindex` + method: + + .. ipython:: python + :okwarning: + + modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') + da = xr.DataArray(range(24), [('time', modern_times)]) + da + datetimeindex = da.indexes['time'].to_datetimeindex() + da['time'] = datetimeindex + + However in this case one should use caution to only perform operations which + do not depend on differences between dates (e.g. differentiation, + interpolation, or upsampling with resample), as these could introduce subtle + and silent errors due to the difference in calendar types between the dates + encoded in your data and the dates stored in memory. + +.. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#timestamp-limitations +.. _ISO 8601-format: https://en.wikipedia.org/wiki/ISO_8601 +.. _partial datetime string indexing: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#partial-string-indexing diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 260c20d0b31..1135613c911 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -96,7 +96,7 @@ Bug fixes from higher frequencies to lower frequencies. Datapoints outside the bounds of the original time coordinate are now filled with NaN (:issue:`2197`). By `Spencer Clark `_. -- Line plots with the `x` argument set to a non-dimensional coord now plot the correct data for 1D DataArrays. +- Line plots with the ``x`` argument set to a non-dimensional coord now plot the correct data for 1D DataArrays. (:issue:`27251). By `Tom Nicholas `_. - Subtracting a scalar ``cftime.datetime`` object from a :py:class:`CFTimeIndex` now results in a :py:class:`pandas.TimedeltaIndex` diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 61efcfdedf2..36baa9071c0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -247,6 +247,13 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, dataset : Dataset The newly created dataset. + Notes + ----- + ``open_dataset`` opens the file with read-only access. When you modify + values of a Dataset, even one linked to files on disk, only the in-memory + copy you are manipulating in xarray is modified: the original file on disk + is never touched. + See Also -------- open_mfdataset @@ -597,6 +604,13 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, ------- xarray.Dataset + Notes + ----- + ``open_mfdataset`` opens files with read-only access. When you modify values + of a Dataset, even one linked to files on disk, only the in-memory copy you + are manipulating in xarray is modified: the original file on disk is never + touched. + See Also -------- auto_combine diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 96b42f19555..e7e12ae3da4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1385,8 +1385,9 @@ def transpose(self, *dims): Notes ----- - Although this operation returns a view of this array's data, it is - not lazy -- the data will be fully loaded. + This operation returns a view of this array's data. It is + lazy for dask-backed DataArrays but not for numpy-backed DataArrays + -- the data will be fully loaded. See Also -------- @@ -2437,10 +2438,10 @@ def integrate(self, dim, datetime_unit=None): ---------- dim: str, or a sequence of str Coordinate(s) used for the integration. - datetime_unit - Can be specify the unit if datetime coordinate is used. One of - {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', - 'as'} + datetime_unit: str, optional + Can be used to specify the unit if datetime coordinate is used. + One of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', + 'ps', 'fs', 'as'} Returns ------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f3e6cac1c5b..12c5d139fdc 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2848,8 +2848,9 @@ def transpose(self, *dims): Notes ----- - Although this operation returns a view of each array's data, it - is not lazy -- the data will be fully loaded into memory. + This operation returns a view of each array's data. It is + lazy for dask-backed DataArrays but not for numpy-backed DataArrays + -- the data will be fully loaded into memory. See Also -------- diff --git a/xarray/core/variable.py b/xarray/core/variable.py index b675317d83d..433f4a05e1f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1133,8 +1133,8 @@ def transpose(self, *dims): Notes ----- - Although this operation returns a view of this variable's data, it is - not lazy -- the data will be fully loaded. + This operation returns a view of this variable's data. It is + lazy for dask-backed Variables but not for numpy-backed Variables. See Also -------- From 81cedd6e6d111c3ac58655585ffa6945fcc0e39b Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Thu, 14 Mar 2019 08:59:12 -0700 Subject: [PATCH 02/14] Drop failing tests writing multi-dimensional arrays as attributes (#2810) These aren't valid for netCDF files. Fixes GH2803 --- xarray/tests/test_backends.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f610dba1352..c6ddb8fae58 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3444,11 +3444,6 @@ def new_dataset_and_coord_attrs(): with create_tmp_file() as tmp_file: ds.to_netcdf(tmp_file) - ds, attrs = new_dataset_and_attrs() - attrs['test'] = np.arange(12).reshape(3, 4) - with create_tmp_file() as tmp_file: - ds.to_netcdf(tmp_file) - ds, attrs = new_dataset_and_attrs() attrs['test'] = 'This is a string' with create_tmp_file() as tmp_file: @@ -3459,11 +3454,6 @@ def new_dataset_and_coord_attrs(): with create_tmp_file() as tmp_file: ds.to_netcdf(tmp_file) - ds, attrs = new_dataset_and_attrs() - attrs['test'] = np.arange(12).reshape(3, 4) - with create_tmp_file() as tmp_file: - ds.to_netcdf(tmp_file) - @requires_scipy_or_netCDF4 class TestDataArrayToNetCDF(object): From f382fd840dafa5fdd95e66a7ddd15a3d498c1bce Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Thu, 14 Mar 2019 21:22:10 -0700 Subject: [PATCH 03/14] Push back finalizing deprecations for 0.12 (#2809) 0.12 will already have a big change in dropping Python 2.7 support. I'd rather wait a bit longer to finalize these deprecations to minimize the impact on users. --- xarray/backends/api.py | 4 ++-- xarray/core/groupby.py | 10 ++++++---- xarray/core/utils.py | 3 ++- xarray/tests/test_dataarray.py | 2 +- xarray/tutorial.py | 9 +++++---- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 36baa9071c0..1f330bbd3a0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -262,8 +262,8 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, warnings.warn( 'The autoclose argument is no longer used by ' 'xarray.open_dataset() and is now ignored; it will be removed in ' - 'xarray v0.12. If necessary, you can control the maximum number ' - 'of simultaneous open files with ' + 'a future version of xarray. If necessary, you can control the ' + 'maximum number of simultaneous open files with ' 'xarray.set_options(file_cache_maxsize=...).', FutureWarning, stacklevel=2) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 1fa1c159fbc..e8e2f1b08d4 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -616,8 +616,9 @@ def reduce(self, func, dim=None, axis=None, if self._obj.ndim > 1: warnings.warn( "Default reduction dimension will be changed to the " - "grouped dimension after xarray 0.12. To silence this " - "warning, pass dim=xarray.ALL_DIMS explicitly.", + "grouped dimension in a future version of xarray. To " + "silence this warning, pass dim=xarray.ALL_DIMS " + "explicitly.", FutureWarning, stacklevel=2) if keep_attrs is None: @@ -731,8 +732,9 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): # the deprecation process. Do not forget to remove _reduce_method warnings.warn( "Default reduction dimension will be changed to the " - "grouped dimension after xarray 0.12. To silence this " - "warning, pass dim=xarray.ALL_DIMS explicitly.", + "grouped dimension in a future version of xarray. To " + "silence this warning, pass dim=xarray.ALL_DIMS " + "explicitly.", FutureWarning, stacklevel=2) elif dim is None: dim = self._group_dim diff --git a/xarray/core/utils.py b/xarray/core/utils.py index fd1330a4e1f..349c8f98dc5 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -20,7 +20,8 @@ def _check_inplace(inplace, default=False): inplace = default else: warnings.warn('The inplace argument has been deprecated and will be ' - 'removed in xarray 0.12.0.', FutureWarning, stacklevel=3) + 'removed in a future version of xarray.', + FutureWarning, stacklevel=3) return inplace diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ab05f19dbbe..4975071dad8 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2037,7 +2037,7 @@ def test_groupby_warning(self): with pytest.warns(FutureWarning): grouped.sum() - @pytest.mark.skipif(LooseVersion(xr.__version__) < LooseVersion('0.12'), + @pytest.mark.skipif(LooseVersion(xr.__version__) < LooseVersion('0.13'), reason="not to forget the behavior change") def test_groupby_sum_default(self): array = self.make_groupby_example_array() diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 3f92bd9a400..f54cf7b3889 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -91,16 +91,17 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, def load_dataset(*args, **kwargs): """ - `load_dataset` will be removed in version 0.12. The current behavior of - this function can be achived by using `tutorial.open_dataset(...).load()`. + `load_dataset` will be removed a future version of xarray. The current + behavior of this function can be achived by using + `tutorial.open_dataset(...).load()`. See Also -------- open_dataset """ warnings.warn( - "load_dataset` will be removed in xarray version 0.12. The current " - "behavior of this function can be achived by using " + "load_dataset` will be removed in a future version of xarray. The " + "current behavior of this function can be achived by using " "`tutorial.open_dataset(...).load()`.", DeprecationWarning, stacklevel=2) return open_dataset(*args, **kwargs).load() From 225868d232219440b188956531d5764ff4cd1b53 Mon Sep 17 00:00:00 2001 From: Scott Henderson Date: Fri, 15 Mar 2019 17:35:57 -0700 Subject: [PATCH 04/14] enable loading remote hdf5 files (#2782) * attempt at loading remote hdf5 * added a couple tests * rewind bytes after reading header * addressed comments for tests and error message * fixed pep8 formatting * created _get_engine_from_magic_number function, new tests * added description in whats-new * fixed test failure on windows * same error on windows and nix --- doc/whats-new.rst | 8 ++-- xarray/backends/api.py | 77 +++++++++++++++++++++++------------ xarray/tests/__init__.py | 6 +++ xarray/tests/test_backends.py | 50 ++++++++++++++++++++++- 4 files changed, 111 insertions(+), 30 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1135613c911..df68378d8d3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,9 @@ Breaking changes Enhancements ~~~~~~~~~~~~ - +- Added ability to open netcdf4/hdf5 file-like objects with ``open_dataset``. + Requires (h5netcdf>0.7 and h5py>2.9.0). (:issue:`2781`) + By `Scott Henderson `_ - Internal plotting now supports ``cftime.datetime`` objects as time series. (:issue:`2164`) By `Julius Busecke `_ and @@ -81,8 +83,8 @@ Enhancements :py:meth:`~xarray.open_mfdataset` (:issue:`1263`) and/or to silence serialization warnings raised if dates from a standard calendar are found to be outside the :py:class:`pandas.Timestamp`-valid range (:issue:`2754`). By - `Spencer Clark `_. - + `Spencer Clark `_. + - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`). By `Kevin Squire `_. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 1f330bbd3a0..a982c6cd35e 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -75,6 +75,34 @@ def _get_default_engine_netcdf(): return engine +def _get_engine_from_magic_number(filename_or_obj): + # check byte header to determine file type + if isinstance(filename_or_obj, bytes): + magic_number = filename_or_obj[:8] + else: + if filename_or_obj.tell() != 0: + raise ValueError("file-like object read/write pointer not at zero " + "please close and reopen, or use a context " + "manager") + magic_number = filename_or_obj.read(8) + filename_or_obj.seek(0) + + if magic_number.startswith(b'CDF'): + engine = 'scipy' + elif magic_number.startswith(b'\211HDF\r\n\032\n'): + engine = 'h5netcdf' + if isinstance(filename_or_obj, bytes): + raise ValueError("can't open netCDF4/HDF5 as bytes " + "try passing a path or file-like object") + else: + if isinstance(filename_or_obj, bytes) and len(filename_or_obj) > 80: + filename_or_obj = filename_or_obj[:80] + b'...' + raise ValueError('{} is not a valid netCDF file ' + 'did you mean to pass a string for a path instead?' + .format(filename_or_obj)) + return engine + + def _get_default_engine(path, allow_remote=False): if allow_remote and is_remote_uri(path): engine = _get_default_engine_remote_uri() @@ -170,8 +198,8 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, Strings and Path objects are interpreted as a path to a netCDF file or an OpenDAP URL and opened with python-netCDF4, unless the filename ends with .gz, in which case the file is gunzipped and opened with - scipy.io.netcdf (only netCDF3 supported). File-like objects are opened - with scipy.io.netcdf (only netCDF3 supported). + scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like + objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). group : str, optional Path to the netCDF4 group in the given file to open (only works for netCDF4 files). @@ -258,6 +286,13 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, -------- open_mfdataset """ + engines = [None, 'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', + 'cfgrib', 'pseudonetcdf'] + if engine not in engines: + raise ValueError('unrecognized engine for open_dataset: {}\n' + 'must be one of: {}' + .format(engine, engines)) + if autoclose is not None: warnings.warn( 'The autoclose argument is no longer used by ' @@ -316,18 +351,9 @@ def maybe_decode_store(store, lock=False): if isinstance(filename_or_obj, backends.AbstractDataStore): store = filename_or_obj - ds = maybe_decode_store(store) - elif isinstance(filename_or_obj, str): - if (isinstance(filename_or_obj, bytes) and - filename_or_obj.startswith(b'\x89HDF')): - raise ValueError('cannot read netCDF4/HDF5 file images') - elif (isinstance(filename_or_obj, bytes) and - filename_or_obj.startswith(b'CDF')): - # netCDF3 file images are handled by scipy - pass - elif isinstance(filename_or_obj, str): - filename_or_obj = _normalize_path(filename_or_obj) + elif isinstance(filename_or_obj, str): + filename_or_obj = _normalize_path(filename_or_obj) if engine is None: engine = _get_default_engine(filename_or_obj, @@ -352,18 +378,19 @@ def maybe_decode_store(store, lock=False): elif engine == 'cfgrib': store = backends.CfGribDataStore( filename_or_obj, lock=lock, **backend_kwargs) - else: - raise ValueError('unrecognized engine for open_dataset: %r' - % engine) - with close_on_error(store): - ds = maybe_decode_store(store) else: - if engine is not None and engine != 'scipy': - raise ValueError('can only read file-like objects with ' - "default engine or engine='scipy'") - # assume filename_or_obj is a file-like object - store = backends.ScipyDataStore(filename_or_obj) + if engine not in [None, 'scipy', 'h5netcdf']: + raise ValueError("can only read bytes or file-like objects " + "with engine='scipy' or 'h5netcdf'") + engine = _get_engine_from_magic_number(filename_or_obj) + if engine == 'scipy': + store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs) + elif engine == 'h5netcdf': + store = backends.H5NetCDFStore(filename_or_obj, group=group, + lock=lock, **backend_kwargs) + + with close_on_error(store): ds = maybe_decode_store(store) # Ensure source filename always stored in dataset object (GH issue #2550) @@ -390,8 +417,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, Strings and Paths are interpreted as a path to a netCDF file or an OpenDAP URL and opened with python-netCDF4, unless the filename ends with .gz, in which case the file is gunzipped and opened with - scipy.io.netcdf (only netCDF3 supported). File-like objects are opened - with scipy.io.netcdf (only netCDF3 supported). + scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like + objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). group : str, optional Path to the netCDF4 group in the given file to open (only works for netCDF4 files). diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 281fc662197..4ebcc29a61e 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -77,6 +77,12 @@ def LooseVersion(vstring): has_cfgrib, requires_cfgrib = _importorskip('cfgrib') # some special cases +has_h5netcdf07, requires_h5netcdf07 = _importorskip('h5netcdf', + minversion='0.7') +has_h5py29, requires_h5py29 = _importorskip('h5py', minversion='2.9.0') +has_h5fileobj = has_h5netcdf07 and has_h5py29 +requires_h5fileobj = pytest.mark.skipif( + not has_h5fileobj, reason='requires h5py>2.9.0 & h5netcdf>0.7') has_scipy_or_netCDF4 = has_scipy or has_netCDF4 requires_scipy_or_netCDF4 = pytest.mark.skipif( not has_scipy_or_netCDF4, reason='requires scipy or netCDF4') diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c6ddb8fae58..a20ba2df229 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -35,7 +35,7 @@ requires_cftime, requires_dask, requires_h5netcdf, requires_netCDF4, requires_pathlib, requires_pseudonetcdf, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, requires_scipy_or_netCDF4, - requires_zarr) + requires_zarr, requires_h5fileobj) from .test_coding_times import (_STANDARD_CALENDARS, _NON_STANDARD_CALENDARS, _ALL_CALENDARS) from .test_dataset import create_test_data @@ -1770,7 +1770,7 @@ def test_engine(self): open_dataset(tmp_file, engine='foobar') netcdf_bytes = data.to_netcdf() - with raises_regex(ValueError, 'can only read'): + with raises_regex(ValueError, 'unrecognized engine'): open_dataset(BytesIO(netcdf_bytes), engine='foobar') def test_cross_engine_read_write_netcdf3(self): @@ -1955,6 +1955,52 @@ def test_dump_encodings_h5py(self): assert actual.x.encoding['compression_opts'] is None +@requires_h5fileobj +class TestH5NetCDFFileObject(TestH5NetCDFData): + engine = 'h5netcdf' + + def test_open_badbytes(self): + with raises_regex(ValueError, "HDF5 as bytes"): + with open_dataset(b'\211HDF\r\n\032\n', engine='h5netcdf'): + pass + with raises_regex(ValueError, "not a valid netCDF"): + with open_dataset(b'garbage'): + pass + with raises_regex(ValueError, "can only read bytes"): + with open_dataset(b'garbage', engine='netcdf4'): + pass + with raises_regex(ValueError, "not a valid netCDF"): + with open_dataset(BytesIO(b'garbage'), engine='h5netcdf'): + pass + + def test_open_twice(self): + expected = create_test_data() + expected.attrs['foo'] = 'bar' + with raises_regex(ValueError, 'read/write pointer not at zero'): + with create_tmp_file() as tmp_file: + expected.to_netcdf(tmp_file, engine='h5netcdf') + with open(tmp_file, 'rb') as f: + with open_dataset(f, engine='h5netcdf'): + with open_dataset(f, engine='h5netcdf'): + pass + + def test_open_fileobj(self): + # open in-memory datasets instead of local file paths + expected = create_test_data().drop('dim3') + expected.attrs['foo'] = 'bar' + with create_tmp_file() as tmp_file: + expected.to_netcdf(tmp_file, engine='h5netcdf') + + with open(tmp_file, 'rb') as f: + with open_dataset(f, engine='h5netcdf') as actual: + assert_identical(expected, actual) + + f.seek(0) + with BytesIO(f.read()) as bio: + with open_dataset(bio, engine='h5netcdf') as actual: + assert_identical(expected, actual) + + @requires_h5netcdf @requires_dask @pytest.mark.filterwarnings('ignore:deallocating CachingFileManager') From ad977c94eaaa1ad151bb46f2dad319566261c282 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 15 Mar 2019 21:02:04 -0700 Subject: [PATCH 05/14] Release 0.12.0 --- .gitignore | 1 + doc/whats-new.rst | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 2a016bb9228..fdf1b12d706 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ pip-log.txt .tox nosetests.xml .cache +.mypy_cache .ropeproject/ .tags* .testmon* diff --git a/doc/whats-new.rst b/doc/whats-new.rst index df68378d8d3..1435b123037 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -15,8 +15,8 @@ What's New .. _whats-new.0.12.0: -v0.12.0 (unreleased) --------------------- +v0.12.0 (15 March 2019) +----------------------- Breaking changes ~~~~~~~~~~~~~~~~ @@ -24,12 +24,10 @@ Breaking changes - Remove support for Python 2. This is the first version of xarray that is Python 3 only. (:issue:`1876`). By `Joe Hamman `_. -- The `compat` argument to `Dataset` and the `encoding` argument to - `DataArray` are deprecated and will be removed in a future release. +- The ``compat`` argument to ``Dataset`` and the ``encoding`` argument to + ``DataArray`` are deprecated and will be removed in a future release. (:issue:`1188`) By `Maximilian Roos `_. -- `cyordereddict` is no longer used as an optional dependency (:issue:`2744`). - By `Joe Hamman `_. Enhancements ~~~~~~~~~~~~ @@ -99,14 +97,14 @@ Bug fixes of the original time coordinate are now filled with NaN (:issue:`2197`). By `Spencer Clark `_. - Line plots with the ``x`` argument set to a non-dimensional coord now plot the correct data for 1D DataArrays. - (:issue:`27251). By `Tom Nicholas `_. + (:issue:`27251`). By `Tom Nicholas `_. - Subtracting a scalar ``cftime.datetime`` object from a :py:class:`CFTimeIndex` now results in a :py:class:`pandas.TimedeltaIndex` instead of raising a ``TypeError`` (:issue:`2671`). By `Spencer Clark `_. - backend_kwargs are no longer ignored when using open_dataset with pynio engine (:issue:'2380') - By 'Jonathan Joyce '_. + By `Jonathan Joyce `_. - Fix ``open_rasterio`` creating a WKT CRS instead of PROJ.4 with ``rasterio`` 1.0.14+ (:issue:`2715`). By `David Hoese `_. From 0c9152c0413cd36df8f744a3b0e9c026d37d2b05 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 15 Mar 2019 21:16:05 -0700 Subject: [PATCH 06/14] Add whats-new for 0.12.1 --- doc/whats-new.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1435b123037..a596ab18c01 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -13,6 +13,19 @@ What's New import xarray as xr np.random.seed(123456) +.. _whats-new.0.12.1: + +v0.12.1 (unreleased) +-------------------- + +Enhancements +~~~~~~~~~~~~ + + +Bug fixes +~~~~~~~~~ + + .. _whats-new.0.12.0: v0.12.0 (15 March 2019) From a5ca64ac5988f0c9c9c6b741a5de16e81b90cad5 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 15 Mar 2019 21:28:13 -0700 Subject: [PATCH 07/14] Rework whats-new for 0.12 --- doc/whats-new.rst | 89 +++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a596ab18c01..3de610b3046 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,37 +31,70 @@ Bug fixes v0.12.0 (15 March 2019) ----------------------- -Breaking changes -~~~~~~~~~~~~~~~~ +Highlights include: + +- Removed support for Python 2. This is the first version of xarray that is + Python 3 only! +- New :py:meth:`~xarray.DataArray.coarsen` and + :py:meth:`~xarray.DataArray.integrate` methods. See :ref:`comput.coarsen` + and :ref:`compute.using_coordinates` for details. +- Many improvements to cftime support. See below for details. + +Deprecations +~~~~~~~~~~~~ -- Remove support for Python 2. This is the first version of xarray that is - Python 3 only. (:issue:`1876`). - By `Joe Hamman `_. - The ``compat`` argument to ``Dataset`` and the ``encoding`` argument to ``DataArray`` are deprecated and will be removed in a future release. (:issue:`1188`) By `Maximilian Roos `_. -Enhancements -~~~~~~~~~~~~ -- Added ability to open netcdf4/hdf5 file-like objects with ``open_dataset``. - Requires (h5netcdf>0.7 and h5py>2.9.0). (:issue:`2781`) - By `Scott Henderson `_ +cftime related enhancements +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Resampling of standard and non-standard calendars indexed by + :py:class:`~xarray.CFTimeIndex` is now possible. (:issue:`2191`). + By `Jwen Fai Low `_ and + `Spencer Clark `_. + +- Taking the mean of arrays of :py:class:`cftime.datetime` objects, and + by extension, use of :py:meth:`~xarray.DataArray.coarsen` with + :py:class:`cftime.datetime` coordinates is now possible. By `Spencer Clark + `_. + - Internal plotting now supports ``cftime.datetime`` objects as time series. (:issue:`2164`) By `Julius Busecke `_ and `Spencer Clark `_. + +- :py:meth:`~xarray.cftime_range` now supports QuarterBegin and QuarterEnd offsets (:issue:`2663`). + By `Jwen Fai Low `_ + +- :py:meth:`~xarray.open_dataset` now accepts a ``use_cftime`` argument, which + can be used to require that ``cftime.datetime`` objects are always used, or + never used when decoding dates encoded with a standard calendar. This can be + used to ensure consistent date types are returned when using + :py:meth:`~xarray.open_mfdataset` (:issue:`1263`) and/or to silence + serialization warnings raised if dates from a standard calendar are found to + be outside the :py:class:`pandas.Timestamp`-valid range (:issue:`2754`). By + `Spencer Clark `_. + +- :py:meth:`pandas.Series.dropna` is now supported for a + :py:class:`pandas.Series` indexed by a :py:class:`~xarray.CFTimeIndex` + (:issue:`2688`). By `Spencer Clark `_. + +Other enhancements +~~~~~~~~~~~~~~~~~~ + +- Added ability to open netcdf4/hdf5 file-like objects with ``open_dataset``. + Requires (h5netcdf>0.7 and h5py>2.9.0). (:issue:`2781`) + By `Scott Henderson `_ - Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`) By `Ryan Abernathey `_ -- :py:meth:`~xarray.DataArray.coarsen` and - :py:meth:`~xarray.Dataset.coarsen` are newly added. +- :py:meth:`DataArray.coarsen` and + :py:meth:`Dataset.coarsen` are newly added. See :ref:`comput.coarsen` for details. (:issue:`2525`) By `Keisuke Fujii `_. -- Taking the mean of arrays of :py:class:`cftime.datetime` objects, and - by extension, use of :py:meth:`~xarray.DataArray.coarsen` with - :py:class:`cftime.datetime` coordinates is now possible. By `Spencer Clark - `_. - Upsampling an array via interpolation with resample is now dask-compatible, as long as the array is not chunked along the resampling dimension. By `Spencer Clark `_. @@ -70,32 +103,14 @@ Enhancements report showing what exactly differs between the two objects (dimensions / coordinates / variables / attributes) (:issue:`1507`). By `Benoit Bovy `_. -- Resampling of standard and non-standard calendars indexed by - :py:class:`~xarray.CFTimeIndex` is now possible. (:issue:`2191`). - By `Jwen Fai Low `_ and - `Spencer Clark `_. - Add ``tolerance`` option to ``resample()`` methods ``bfill``, ``pad``, ``nearest``. (:issue:`2695`) By `Hauke Schulz `_. -- :py:meth:`~xarray.DataArray.integrate` and - :py:meth:`~xarray.Dataset.integrate` are newly added. - See :ref:`_compute.using_coordinates` for the detail. +- :py:meth:`DataArray.integrate` and + :py:meth:`Dataset.integrate` are newly added. + See :ref:`compute.using_coordinates` for the detail. (:issue:`1332`) By `Keisuke Fujii `_. -- :py:meth:`pandas.Series.dropna` is now supported for a - :py:class:`pandas.Series` indexed by a :py:class:`~xarray.CFTimeIndex` - (:issue:`2688`). By `Spencer Clark `_. -- :py:meth:`~xarray.cftime_range` now supports QuarterBegin and QuarterEnd offsets (:issue:`2663`). - By `Jwen Fai Low `_ -- :py:meth:`~xarray.open_dataset` now accepts a ``use_cftime`` argument, which - can be used to require that ``cftime.datetime`` objects are always used, or - never used when decoding dates encoded with a standard calendar. This can be - used to ensure consistent date types are returned when using - :py:meth:`~xarray.open_mfdataset` (:issue:`1263`) and/or to silence - serialization warnings raised if dates from a standard calendar are found to - be outside the :py:class:`pandas.Timestamp`-valid range (:issue:`2754`). By - `Spencer Clark `_. - - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`). By `Kevin Squire `_. From 55b8fe92473724ae03c93777e21fc7c36074188f Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 19 Mar 2019 21:43:26 -0700 Subject: [PATCH 08/14] DOC: Update donation links --- README.rst | 2 +- doc/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index f69f7d95c31..6dbf774549d 100644 --- a/README.rst +++ b/README.rst @@ -97,7 +97,7 @@ to supporting the open source scientific computing community. If you like Xarray and want to support our mission, please consider making a donation_ to support our efforts. -.. _donation: https://www.flipcause.com/secure/cause_pdetails/NDE2NTU= +.. _donation: https://numfocus.salsalabs.org/donate-to-xarray/ History ------- diff --git a/doc/index.rst b/doc/index.rst index 1d3bb110ddb..002bd102e12 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -140,7 +140,7 @@ to supporting the open source scientific computing community. If you like Xarray and want to support our mission, please consider making a donation_ to support our efforts. -.. _donation: https://www.flipcause.com/secure/cause_pdetails/NDE2NTU= +.. _donation: https://numfocus.salsalabs.org/donate-to-xarray/ History From 164d20abb6ffc35eb3f314ce7fb5b9600cf9de3f Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 20 Mar 2019 12:07:58 -0700 Subject: [PATCH 09/14] DOC: remove outdated warning (#2818) --- doc/data-structures.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/doc/data-structures.rst b/doc/data-structures.rst index a8887471ec7..5be1f7b4262 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -353,13 +353,6 @@ setting) variables and attributes: This is particularly useful in an exploratory context, because you can tab-complete these variable names with tools like IPython. -.. warning:: - - We are changing the behavior of iterating over a Dataset the next major - release of xarray, to only include data variables instead of both data - variables and coordinates. In the meantime, prefer iterating over - ``ds.data_vars`` or ``ds.coords``. - Dictionary like methods ~~~~~~~~~~~~~~~~~~~~~~~ From 8126d3e623667930b1df43c2d936a0ba52a6ca19 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 21 Mar 2019 21:12:51 -0400 Subject: [PATCH 10/14] use == to compare strings; `is` relies on interning (#2832) --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 433f4a05e1f..93424474d05 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1597,7 +1597,7 @@ def rank(self, dim, pct=False): "prior to calling this method.") axis = self.get_axis_num(dim) - func = bn.nanrankdata if self.dtype.kind is 'f' else bn.rankdata + func = bn.nanrankdata if self.dtype.kind == 'f' else bn.rankdata ranked = func(self.data, axis=axis) if pct: count = np.sum(~np.isnan(self.data), axis=axis, keepdims=True) From 742ed3984f437982057fd46ecfb0bce214563cb8 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Fri, 22 Mar 2019 00:37:24 -0400 Subject: [PATCH 11/14] Enable python 3.5.0-3.5.2 (#2831) * bump minimum python version to 3.5.3 * Revert "bump minimum python version to 3.5.3" This reverts commit 77553e17cf2126868d33616c349504e64c76e7b4. * guard typing import block * attempt to set patch version to 3.5.0 * "Type" also needs a guard * move 3.5.0 to py35-min * guiard all TYPE_CHECKING with new global * missing import * when two lines become one * formatting * two steps forward, one step back * Consolidate variables * lint --- ci/requirements-py35-min.yml | 2 +- xarray/coding/cftime_offsets.py | 6 +++++- xarray/core/computation.py | 7 ++++--- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 19 +++++++++---------- xarray/core/merge.py | 9 +++++---- xarray/core/pycompat.py | 6 ++++++ xarray/core/variable.py | 13 +++++++++---- 8 files changed, 40 insertions(+), 24 deletions(-) diff --git a/ci/requirements-py35-min.yml b/ci/requirements-py35-min.yml index 1f41d0d9dc1..b140d81b959 100644 --- a/ci/requirements-py35-min.yml +++ b/ci/requirements-py35-min.yml @@ -1,6 +1,6 @@ name: test_env dependencies: - - python=3.5 + - python=3.5.0 - pytest - flake8 - mock diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a74c735224b..d724554b458 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -41,15 +41,19 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import re +import typing from datetime import timedelta from functools import partial -from typing import ClassVar, Optional import numpy as np +from ..core.pycompat import TYPE_CHECKING from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso from .times import format_cftime_datetime +if TYPE_CHECKING: + from typing import ClassVar, Optional + def get_date_type(calendar): """Return the cftime date type for a given calendar name.""" diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f9fd9022de9..451d95ee542 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -4,21 +4,22 @@ import functools import itertools import operator +import typing from collections import Counter, OrderedDict from distutils.version import LooseVersion from typing import ( AbstractSet, Any, Callable, Iterable, List, Mapping, Optional, Sequence, - Tuple, TYPE_CHECKING, Union, -) + Tuple, Union) import numpy as np from . import duck_array_ops, utils from .alignment import deep_align from .merge import expand_and_merge_variables -from .pycompat import dask_array_type +from .pycompat import TYPE_CHECKING, dask_array_type from .utils import is_dict_like from .variable import Variable + if TYPE_CHECKING: from .dataset import Dataset diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e7e12ae3da4..7cd856db5b4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5,9 +5,9 @@ import numpy as np import pandas as pd +from ..plot.plot import _PlotMethods from . import ( computation, dtypes, groupby, indexing, ops, resample, rolling, utils) -from ..plot.plot import _PlotMethods from .accessors import DatetimeAccessor from .alignment import align, reindex_like_indexers from .common import AbstractArray, DataWithCoords diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 12c5d139fdc..3463ada600b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1,45 +1,44 @@ import copy import functools import sys +import typing import warnings from collections import OrderedDict, defaultdict from collections.abc import Mapping from distutils.version import LooseVersion from numbers import Number from typing import ( - Any, Callable, Dict, List, Optional, Set, Tuple, TypeVar, TYPE_CHECKING, - Union, -) + Any, Callable, Dict, List, Optional, Set, Tuple, TypeVar, Union) import numpy as np import pandas as pd import xarray as xr +from ..coding.cftimeindex import _parse_array_of_cftime_strings from . import ( alignment, dtypes, duck_array_ops, formatting, groupby, indexing, ops, pdcompat, resample, rolling, utils) -from ..coding.cftimeindex import _parse_array_of_cftime_strings from .alignment import align from .common import ( ALL_DIMS, DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects) from .coordinates import ( DatasetCoordinates, LevelCoordinatesSource, assert_coordinate_consistent, - remap_label_indexers, -) + remap_label_indexers) from .duck_array_ops import datetime_to_numeric from .indexes import Indexes, default_indexes, isel_variable_and_index from .merge import ( dataset_merge_method, dataset_update_method, merge_data_and_coords, merge_variables) from .options import OPTIONS, _get_keep_attrs -from .pycompat import dask_array_type +from .pycompat import TYPE_CHECKING, dask_array_type from .utils import ( - Frozen, SortedKeysDict, _check_inplace, - decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution, - hashable, maybe_wrap_array) + Frozen, SortedKeysDict, _check_inplace, decode_numpy_dict_values, + either_dict_or_kwargs, ensure_us_time_resolution, hashable, is_dict_like, + maybe_wrap_array) from .variable import IndexVariable, Variable, as_variable, broadcast_variables + if TYPE_CHECKING: from .dataarray import DataArray diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 3039eecb2f8..363fdfc2337 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -1,18 +1,19 @@ +import typing from collections import OrderedDict - -from typing import ( - Any, Dict, List, Mapping, Optional, Set, Tuple, TYPE_CHECKING, Union, -) +from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Union import pandas as pd from .alignment import deep_align +from .pycompat import TYPE_CHECKING from .utils import Frozen from .variable import ( Variable, as_variable, assert_unique_multiindex_level_names) + if TYPE_CHECKING: from .dataset import Dataset + PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel) _VALID_COMPAT = Frozen({'identical': 0, diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index bd2075fa300..0df0e727303 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -1,4 +1,6 @@ # flake8: noqa +import sys +import typing import numpy as np @@ -10,3 +12,7 @@ dask_array_type = (dask.array.Array,) except ImportError: # pragma: no cover dask_array_type = () + +# Ensure we have some more recent additions to the typing module. +# Note that TYPE_CHECKING itself is not available on Python 3.5.1. +TYPE_CHECKING = sys.version >= '3.5.3' and typing.TYPE_CHECKING diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 93424474d05..d6b64e7d458 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1,8 +1,8 @@ import functools import itertools +import typing from collections import OrderedDict, defaultdict from datetime import timedelta -from typing import Tuple, Type, Union import numpy as np import pandas as pd @@ -15,9 +15,14 @@ BasicIndexer, OuterIndexer, PandasIndexAdapter, VectorizedIndexer, as_indexable) from .options import _get_keep_attrs -from .pycompat import dask_array_type, integer_types -from .utils import (OrderedSet, either_dict_or_kwargs, - decode_numpy_dict_values, ensure_us_time_resolution) +from .pycompat import TYPE_CHECKING, dask_array_type, integer_types +from .utils import ( + OrderedSet, decode_numpy_dict_values, either_dict_or_kwargs, + ensure_us_time_resolution) + +if TYPE_CHECKING: + from typing import Tuple, Type, Union + try: import dask.array as da From 948d98476524e3cf399ce6131dcfc44bf6eebb6e Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Sun, 24 Mar 2019 14:11:51 -0700 Subject: [PATCH 12/14] Remove py37 collections.abc deprecation warning from lru_cache module. (#2849) Looks like this occurrence was missing in #2574. --- xarray/backends/lru_cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/lru_cache.py b/xarray/backends/lru_cache.py index e407c384aaf..4be6efea7c0 100644 --- a/xarray/backends/lru_cache.py +++ b/xarray/backends/lru_cache.py @@ -1,8 +1,9 @@ import collections +import collections.abc import threading -class LRUCache(collections.MutableMapping): +class LRUCache(collections.abc.MutableMapping): """Thread-safe LRUCache based on an OrderedDict. All dict operations (__getitem__, __setitem__, __contains__) update the From 72e6208e59db14d0a707ae1fd401104da40d91b3 Mon Sep 17 00:00:00 2001 From: Spencer Mathews Date: Sun, 24 Mar 2019 19:30:00 -0700 Subject: [PATCH 13/14] Fix formatting of engine parameter in docstrings (#2846) * Fix formatting of engine parameter in docstrings * Fix lint pep8 line length error --- xarray/backends/api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a982c6cd35e..afb69f6e9e9 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -230,7 +230,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, decode_coords : bool, optional If True, decode the 'coordinates' attribute to identify coordinates in the resulting dataset. - engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'cfgrib', + engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'cfgrib', \ 'pseudonetcdf'}, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for @@ -445,7 +445,7 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, decode_coords : bool, optional If True, decode the 'coordinates' attribute to identify coordinates in the resulting dataset. - engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'cfgrib'}, + engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'cfgrib'}, \ optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for @@ -584,7 +584,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, If provided, call this function on each dataset prior to concatenation. You can find the file-name from which each dataset was loaded in ``ds.encoding['source']``. - engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'cfgrib'}, + engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'cfgrib'}, \ optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for From 89dddc1f5892178abf7375e14adfd5b729b71037 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 24 Mar 2019 19:30:28 -0700 Subject: [PATCH 14/14] Fix indexes created by Dataset.swap_dims (#2845) Fixes GH2842 --- doc/whats-new.rst | 3 +++ xarray/core/dataset.py | 14 +++++--------- xarray/tests/test_dataset.py | 5 +---- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3de610b3046..6cf2720a033 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Enhancements Bug fixes ~~~~~~~~~ +- ``swap_dims`` would create incorrect ``indexes`` (:issue:`2842`). + By `Stephan Hoyer `_. + .. _whats-new.0.12.0: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3463ada600b..3bb54e80456 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2307,24 +2307,20 @@ def swap_dims(self, dims_dict, inplace=None): coord_names.update(dims_dict.values()) variables = OrderedDict() + indexes = OrderedDict() for k, v in self.variables.items(): dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) if k in result_dims: var = v.to_index_variable() + if k in self.indexes: + indexes[k] = self.indexes[k] + else: + indexes[k] = var.to_index() else: var = v.to_base_variable() var.dims = dims variables[k] = var - indexes = OrderedDict() - for k, v in self.indexes.items(): - if k in dims_dict: - new_name = dims_dict[k] - new_index = variables[k].to_index() - indexes[new_name] = new_index - else: - indexes[k] = v - return self._replace_with_new_dims(variables, coord_names, indexes=indexes, inplace=inplace) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 8e8c6c4b419..777a8e84a3f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2002,14 +2002,11 @@ def test_swap_dims(self): assert_identical(expected, actual) assert isinstance(actual.variables['y'], IndexVariable) assert isinstance(actual.variables['x'], Variable) + assert actual.indexes['y'].equals(pd.Index(list('abc'))) roundtripped = actual.swap_dims({'y': 'x'}) assert_identical(original.set_coords('y'), roundtripped) - actual = original.copy() - actual = actual.swap_dims({'x': 'y'}) - assert_identical(expected, actual) - with raises_regex(ValueError, 'cannot swap'): original.swap_dims({'y': 'x'}) with raises_regex(ValueError, 'replacement dimension'):