WIP: more annotations (pydata#3090)

crusaderky · shoyer · commit 8f0d9e5c9909 · 2019-07-10T21:20:55.000-07:00
* Typing hints for Dataset.to_netcdf

* type annotations

* poke codecov
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -4,21 +4,29 @@
 from io import BytesIO
 from numbers import Number
 from pathlib import Path
-import re
+from typing import Callable, Dict, Hashable, Iterable, Mapping, Tuple, Union
 
 import numpy as np
-import pandas as pd
 
 from .. import Dataset, DataArray, backends, conventions, coding
 from ..core import indexing
 from .. import auto_combine
-from ..core.combine import (combine_by_coords, _nested_combine,
-                            _infer_concat_order_from_positions)
+from ..core.combine import (
+    combine_by_coords,
+    _nested_combine,
+    _infer_concat_order_from_positions
+)
+from ..core.pycompat import TYPE_CHECKING
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
-from ..core.variable import Variable
-from .common import ArrayWriter
+from .common import ArrayWriter, AbstractDataStore
 from .locks import _get_scheduler
-from ..coding.variables import safe_setitem, unpack_for_encoding
+
+if TYPE_CHECKING:
+    try:
+        from dask.delayed import Delayed
+    except ImportError:
+        Delayed = None
+
 
 DATAARRAY_NAME = '__xarray_dataarray_name__'
 DATAARRAY_VARIABLE = '__xarray_dataarray_variable__'
@@ -406,7 +414,7 @@ def maybe_decode_store(store, lock=False):
     if isinstance(filename_or_obj, Path):
         filename_or_obj = str(filename_or_obj)
 
-    if isinstance(filename_or_obj, backends.AbstractDataStore):
+    if isinstance(filename_or_obj, AbstractDataStore):
         store = filename_or_obj
 
     elif isinstance(filename_or_obj, str):
@@ -805,14 +813,25 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
     return combined
 
 
-WRITEABLE_STORES = {'netcdf4': backends.NetCDF4DataStore.open,
-                    'scipy': backends.ScipyDataStore,
-                    'h5netcdf': backends.H5NetCDFStore}
-
-
-def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None,
-              engine=None, encoding=None, unlimited_dims=None, compute=True,
-              multifile=False):
+WRITEABLE_STORES = {
+    'netcdf4': backends.NetCDF4DataStore.open,
+    'scipy': backends.ScipyDataStore,
+    'h5netcdf': backends.H5NetCDFStore
+}  # type: Dict[str, Callable]
+
+
+def to_netcdf(
+    dataset: Dataset,
+    path_or_file=None,
+    mode: str = 'w',
+    format: str = None,
+    group: str = None,
+    engine: str = None,
+    encoding: Mapping = None,
+    unlimited_dims: Iterable[Hashable] = None,
+    compute: bool = True,
+    multifile: bool = False
+) -> Union[Tuple[ArrayWriter, AbstractDataStore], bytes, 'Delayed', None]:
     """This function creates an appropriate datastore for writing a dataset to
     disk as a netCDF file
 
@@ -872,8 +891,12 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None,
 
     if unlimited_dims is None:
         unlimited_dims = dataset.encoding.get('unlimited_dims', None)
-    if isinstance(unlimited_dims, str):
-        unlimited_dims = [unlimited_dims]
+    if unlimited_dims is not None:
+        if (isinstance(unlimited_dims, str)
+                or not isinstance(unlimited_dims, Iterable)):
+            unlimited_dims = [unlimited_dims]
+        else:
+            unlimited_dims = list(unlimited_dims)
 
     writer = ArrayWriter()
 
@@ -902,6 +925,7 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None,
     if not compute:
         import dask
         return dask.delayed(_finalize_store)(writes, store)
+    return None
 
 
 def dump_to_store(dataset, store, writer=None, encoder=None,
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -2,7 +2,7 @@
 import sys
 import warnings
 from collections import OrderedDict
-from distutils.version import LooseVersion
+from numbers import Number
 from typing import (Any, Callable, Dict, Hashable, Iterable, List, Mapping,
                     Optional, Sequence, Tuple, Union, cast)
 
@@ -871,13 +871,19 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
         """
         return self.variable.chunks
 
-    def chunk(self, chunks: Union[
-        None, int, Tuple[int, ...], Tuple[Tuple[int, ...], ...],
-        Mapping[Hashable, Union[None, int, Tuple[int, ...]]],
-    ] = None,
-            name_prefix: str = 'xarray-',
-            token: Optional[str] = None,
-            lock: bool = False) -> 'DataArray':
+    def chunk(
+        self,
+        chunks: Union[
+            None,
+            Number,
+            Tuple[Number, ...],
+            Tuple[Tuple[Number, ...], ...],
+            Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]],
+        ] = None,
+        name_prefix: str = 'xarray-',
+        token: Optional[str] = None,
+        lock: bool = False
+    ) -> 'DataArray':
         """Coerce this array's data into a dask arrays with the given chunks.
 
         If this variable is a non-dask array, it will be converted to dask
@@ -890,7 +896,7 @@ def chunk(self, chunks: Union[
 
         Parameters
         ----------
-        chunks : int, tuple or dict, optional
+        chunks : int, tuple or mapping, optional
             Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
             ``{'x': 5, 'y': 5}``.
         name_prefix : str, optional
@@ -905,7 +911,7 @@ def chunk(self, chunks: Union[
         -------
         chunked : xarray.DataArray
         """
-        if isinstance(chunks, (list, tuple)):
+        if isinstance(chunks, (tuple, list)):
             chunks = dict(zip(self.dims, chunks))
 
         ds = self._to_temp_dataset().chunk(chunks, name_prefix=name_prefix,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -5,8 +5,10 @@
 from collections import OrderedDict, defaultdict
 from distutils.version import LooseVersion
 from numbers import Number
+from pathlib import Path
 from typing import (Any, Dict, Hashable, Iterable, Iterator, List,
-                    Mapping, Optional, Sequence, Set, Tuple, Union, cast)
+                    Mapping, MutableMapping, Optional, Sequence, Set, Tuple,
+                    Union, cast)
 
 import numpy as np
 import pandas as pd
@@ -45,8 +47,12 @@
     pass
 
 if TYPE_CHECKING:
-    from ..backends import AbstractDataStore
+    from ..backends import AbstractDataStore, ZarrStore
     from .dataarray import DataArray
+    try:
+        from dask.delayed import Delayed
+    except ImportError:
+        Delayed = None
 
 
 # list of attributes of pd.DatetimeIndex that are ndarrays of time info
@@ -1309,9 +1315,17 @@ def dump_to_store(self, store: 'AbstractDataStore', **kwargs) -> None:
         # with to_netcdf()
         dump_to_store(self, store, **kwargs)
 
-    def to_netcdf(self, path=None, mode='w', format=None, group=None,
-                  engine=None, encoding=None, unlimited_dims=None,
-                  compute=True):
+    def to_netcdf(
+        self,
+        path=None,
+        mode: str = 'w',
+        format: str = None,
+        group: str = None,
+        engine: str = None,
+        encoding: Mapping = None,
+        unlimited_dims: Iterable[Hashable] = None,
+        compute: bool = True,
+    ) -> Union[bytes, 'Delayed', None]:
         """Write dataset contents to a netCDF file.
 
         Parameters
@@ -1366,7 +1380,7 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None,
             This allows using any compression plugin installed in the HDF5
             library, e.g. LZF.
 
-        unlimited_dims : sequence of str, optional
+        unlimited_dims : iterable of hashable, optional
             Dimension(s) that should be serialized as unlimited dimensions.
             By default, no dimensions are treated as unlimited dimensions.
             Note that unlimited_dims may also be set via
@@ -1383,9 +1397,17 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None,
                          unlimited_dims=unlimited_dims,
                          compute=compute)
 
-    def to_zarr(self, store=None, mode='w-', synchronizer=None, group=None,
-                encoding=None, compute=True, consolidated=False,
-                append_dim=None):
+    def to_zarr(
+        self,
+        store: Union[MutableMapping, str, Path] = None,
+        mode: str = 'w-',
+        synchronizer=None,
+        group: str = None,
+        encoding: Mapping = None,
+        compute: bool = True,
+        consolidated: bool = False,
+        append_dim: Hashable = None
+    ) -> 'ZarrStore':
         """Write dataset contents to a zarr group.
 
         .. note:: Experimental
@@ -1394,15 +1416,15 @@ def to_zarr(self, store=None, mode='w-', synchronizer=None, group=None,
 
         Parameters
         ----------
-        store : MutableMapping or str, optional
+        store : MutableMapping, str or Path, optional
             Store or path to directory in file system.
         mode : {'w', 'w-', 'a'}
             Persistence mode: 'w' means create (overwrite if exists);
             'w-' means create (fail if exists);
             'a' means append (create if does not exist).
         synchronizer : object, optional
             Array synchronizer
-        group : str, obtional
+        group : str, optional
             Group path. (a.k.a. `path` in zarr terminology.)
         encoding : dict, optional
             Nested dictionary with variable names as keys and dictionaries of
@@ -1414,7 +1436,7 @@ def to_zarr(self, store=None, mode='w-', synchronizer=None, group=None,
         consolidated: bool, optional
             If True, apply zarr's `consolidate_metadata` function to the store
             after writing.
-        append_dim: str, optional
+        append_dim: hashable, optional
             If mode='a', the dimension on which the data will be appended.
 
         References
@@ -1432,10 +1454,10 @@ def to_zarr(self, store=None, mode='w-', synchronizer=None, group=None,
                        group=group, encoding=encoding, compute=compute,
                        consolidated=consolidated, append_dim=append_dim)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return formatting.dataset_repr(self)
 
-    def info(self, buf=None):
+    def info(self, buf=None) -> None:
         """
         Concise summary of a Dataset variables and attributes.
 
@@ -1448,7 +1470,6 @@ def info(self, buf=None):
         pandas.DataFrame.assign
         ncdump: netCDF's ncdump
         """
-
         if buf is None:  # pragma: no cover
             buf = sys.stdout
 
@@ -1473,11 +1494,11 @@ def info(self, buf=None):
         buf.write('\n'.join(lines))
 
     @property
-    def chunks(self):
+    def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         """Block dimensions for this dataset's data or None if it's not a dask
         array.
         """
-        chunks = {}
+        chunks = {}  # type: Dict[Hashable, Tuple[int, ...]]
         for v in self.variables.values():
             if v.chunks is not None:
                 for dim, c in zip(v.dims, v.chunks):
@@ -1486,8 +1507,17 @@ def chunks(self):
                     chunks[dim] = c
         return Frozen(SortedKeysDict(chunks))
 
-    def chunk(self, chunks=None, name_prefix='xarray-', token=None,
-              lock=False):
+    def chunk(
+        self,
+        chunks: Union[
+            None,
+            Number,
+            Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]]
+        ] = None,
+        name_prefix: str = 'xarray-',
+        token: str = None,
+        lock: bool = False
+    ) -> 'Dataset':
         """Coerce all arrays in this dataset into dask arrays with the given
         chunks.
 
@@ -1500,7 +1530,7 @@ def chunk(self, chunks=None, name_prefix='xarray-', token=None,
 
         Parameters
         ----------
-        chunks : int or dict, optional
+        chunks : int or mapping, optional
             Chunk sizes along each dimension, e.g., ``5`` or
             ``{'x': 5, 'y': 5}``.
         name_prefix : str, optional
@@ -1526,7 +1556,7 @@ def chunk(self, chunks=None, name_prefix='xarray-', token=None,
             chunks = dict.fromkeys(self.dims, chunks)
 
         if chunks is not None:
-            bad_dims = [d for d in chunks if d not in self.dims]
+            bad_dims = chunks.keys() - self.dims.keys()
             if bad_dims:
                 raise ValueError('some chunks keys are not dimensions on this '
                                  'object: %s' % bad_dims)