-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Add pathlib.Path
support to open_(mf)dataset
#1514
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cb55c45
f9922d6
02023ed
4276bb8
812a483
47be4b7
aac0760
3ca8c9e
aae32a8
2cc69f4
3033433
c8722db
aeed776
137dff2
b55b013
422615f
8c9ee31
f3dbf4b
efdc883
999d21d
04216f1
ce156a8
b22a389
791ba5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ dependencies: | |
- netcdf4 | ||
- numpy | ||
- pandas | ||
- pathlib2 | ||
- pynio | ||
- pytest | ||
- scipy | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ dependencies: | |
- h5netcdf | ||
- matplotlib | ||
- netcdf4 | ||
- pathlib2 | ||
- pytest | ||
- numpy | ||
- pandas | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,14 +7,15 @@ | |
from io import BytesIO | ||
from numbers import Number | ||
|
||
|
||
import numpy as np | ||
|
||
from .. import backends, conventions | ||
from .common import ArrayWriter, GLOBAL_LOCK | ||
from ..core import indexing | ||
from ..core.combine import auto_combine | ||
from ..core.utils import close_on_error, is_remote_uri | ||
from ..core.pycompat import basestring | ||
from ..core.pycompat import basestring, path_type | ||
|
||
DATAARRAY_NAME = '__xarray_dataarray_name__' | ||
DATAARRAY_VARIABLE = '__xarray_dataarray_variable__' | ||
|
@@ -139,12 +140,12 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, | |
|
||
Parameters | ||
---------- | ||
filename_or_obj : str, file or xarray.backends.*DataStore | ||
Strings are interpreted as a path to a netCDF file or an OpenDAP URL | ||
and opened with python-netCDF4, unless the filename ends with .gz, in | ||
which case the file is gunzipped and opened with scipy.io.netcdf (only | ||
netCDF3 supported). File-like objects are opened with scipy.io.netcdf | ||
(only netCDF3 supported). | ||
filename_or_obj : str, Path, file or xarray.backends.*DataStore | ||
Strings and Path objects are interpreted as a path to a netCDF file | ||
or an OpenDAP URL and opened with python-netCDF4, unless the filename | ||
ends with .gz, in which case the file is gunzipped and opened with | ||
scipy.io.netcdf (only netCDF3 supported). File-like objects are opened | ||
with scipy.io.netcdf (only netCDF3 supported). | ||
group : str, optional | ||
Path to the netCDF4 group in the given file to open (only works for | ||
netCDF4 files). | ||
|
@@ -253,6 +254,9 @@ def maybe_decode_store(store, lock=False): | |
|
||
return ds2 | ||
|
||
if isinstance(filename_or_obj, path_type): | ||
filename_or_obj = str(filename_or_obj) | ||
|
||
if isinstance(filename_or_obj, backends.AbstractDataStore): | ||
store = filename_or_obj | ||
elif isinstance(filename_or_obj, basestring): | ||
|
@@ -318,12 +322,12 @@ def open_dataarray(*args, **kwargs): | |
|
||
Parameters | ||
---------- | ||
filename_or_obj : str, file or xarray.backends.*DataStore | ||
Strings are interpreted as a path to a netCDF file or an OpenDAP URL | ||
and opened with python-netCDF4, unless the filename ends with .gz, in | ||
which case the file is gunzipped and opened with scipy.io.netcdf (only | ||
netCDF3 supported). File-like objects are opened with scipy.io.netcdf | ||
(only netCDF3 supported). | ||
filename_or_obj : str, Path, file or xarray.backends.*DataStore | ||
Strings and Paths are interpreted as a path to a netCDF file or an | ||
OpenDAP URL and opened with python-netCDF4, unless the filename ends | ||
with .gz, in which case the file is gunzipped and opened with | ||
scipy.io.netcdf (only netCDF3 supported). File-like objects are opened | ||
with scipy.io.netcdf (only netCDF3 supported). | ||
group : str, optional | ||
Path to the netCDF4 group in the given file to open (only works for | ||
netCDF4 files). | ||
|
@@ -438,7 +442,8 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, | |
---------- | ||
paths : str or sequence | ||
Either a string glob in the form "path/to/my/files/*.nc" or an explicit | ||
list of files to open. | ||
list of files to open. Paths can be given as strings or as pathlib | ||
Paths. | ||
chunks : int or dict, optional | ||
Dictionary with keys given by dimension names and values given by chunk | ||
sizes. In general, these should divide the dimensions of each dataset. | ||
|
@@ -497,6 +502,9 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, | |
""" | ||
if isinstance(paths, basestring): | ||
paths = sorted(glob(paths)) | ||
else: | ||
paths = [str(p) if isinstance(p, path_type) else p for p in paths] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You may have already discussed this with @shoyer but can you remind me why we're not sorting in the same way we do for the glob path above? I guess we're assuming all the paths are expanded already? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We sort after Unfortunately, there isn't any way to detect a generator created by |
||
|
||
if not paths: | ||
raise IOError('no files to open') | ||
|
||
|
@@ -533,6 +541,8 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None, | |
|
||
The ``writer`` argument is only for the private use of save_mfdataset. | ||
""" | ||
if isinstance(path_or_file, path_type): | ||
path_or_file = str(path_or_file) | ||
if encoding is None: | ||
encoding = {} | ||
if path_or_file is None: | ||
|
@@ -597,12 +607,14 @@ def save_mfdataset(datasets, paths, mode='w', format=None, groups=None, | |
---------- | ||
datasets : list of xarray.Dataset | ||
List of datasets to save. | ||
paths : list of str | ||
paths : list of str or list of Paths | ||
List of paths to which to save each corresponding dataset. | ||
mode : {'w', 'a'}, optional | ||
Write ('w') or append ('a') mode. If mode='w', any existing file at | ||
these locations will be overwritten. | ||
format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional | ||
format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', | ||
'NETCDF3_CLASSIC'}, optional | ||
|
||
File format for the resulting netCDF file: | ||
|
||
* NETCDF4: Data is stored in an HDF5 file, using netCDF4 API | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: we typically cite the issue number (e.g.
:issue: 799:
). Would be nice to include here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just pushed a commit to add this