-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Adds open_datatree and load_datatree to the tutorial module #10082
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
9a42379
0caf54a
ef4cbaa
e092710
37d5cc0
d15e1a9
aab8e1d
1529727
92fbd48
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2,29 +2,51 @@ | |||||
|
|
||||||
| import pytest | ||||||
|
|
||||||
| from xarray import DataArray, tutorial | ||||||
| from xarray.tests import assert_identical, network | ||||||
| from xarray import DataArray, DataTree, tutorial | ||||||
| from xarray.testing import assert_identical | ||||||
| from xarray.tests import network | ||||||
|
|
||||||
|
|
||||||
| @pytest.fixture(autouse=True, name="testfile") | ||||||
| def setUp(): | ||||||
| yield "tiny" | ||||||
|
|
||||||
|
|
||||||
| @network | ||||||
| class TestLoadDataset: | ||||||
| @pytest.fixture(autouse=True) | ||||||
| def setUp(self): | ||||||
| self.testfile = "tiny" | ||||||
|
|
||||||
| def test_download_from_github(self, tmp_path) -> None: | ||||||
| def test_download_from_github(self, testfile, tmp_path) -> None: | ||||||
| cache_dir = tmp_path / tutorial._default_cache_dir_name | ||||||
| ds = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load() | ||||||
| ds = tutorial.open_dataset(testfile, cache_dir=cache_dir).load() | ||||||
|
||||||
| ds = tutorial.open_dataset(testfile, cache_dir=cache_dir).load() | |
| ds = tutorial.open_dataset("tiny", cache_dir=cache_dir).load() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,8 +16,10 @@ | |
| import numpy as np | ||
|
|
||
| from xarray.backends.api import open_dataset as _open_dataset | ||
| from xarray.backends.api import open_datatree as _open_datatree | ||
| from xarray.core.dataarray import DataArray | ||
| from xarray.core.dataset import Dataset | ||
| from xarray.core.datatree import DataTree | ||
|
|
||
| if TYPE_CHECKING: | ||
| from xarray.backends.api import T_Engine | ||
|
|
@@ -248,3 +250,139 @@ def scatter_example_dataset(*, seed: None | int = None) -> Dataset: | |
| ds.B.attrs["units"] = "Bunits" | ||
|
|
||
| return ds | ||
|
|
||
|
|
||
| def open_datatree( | ||
| name: str, | ||
| cache: bool = True, | ||
| cache_dir: None | str | os.PathLike = None, | ||
| *, | ||
| engine: T_Engine = None, | ||
| **kws, | ||
| ) -> DataTree: | ||
| """ | ||
| Open a dataset as a `DataTree` from the online repository (requires internet). | ||
|
|
||
| If a local copy is found then always use that to avoid network traffic. | ||
|
|
||
| Available datasets: | ||
| * ``imerghh_730.HDF5`` IMERGHH_07 product from 2021-08-29T07:30:00.000Z | ||
| * ``imerghh_830.HDF5`` IMERGHH_07 product from 2021-08-29T08:30:00.000Z | ||
| * ``"air_temperature"``: NCEP reanalysis subset | ||
| * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients | ||
| * ``"basin_mask"``: Dataset with ocean basins marked using integers | ||
| * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 | ||
| * ``"rasm"``: Output of the Regional Arctic System Model (RASM) | ||
| * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output | ||
| * ``"tiny"``: small synthetic dataset with a 1D data variable | ||
| * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK | ||
| * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data | ||
| * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages | ||
|
|
||
| Parameters | ||
| ---------- | ||
| name : str | ||
| Name of the file containing the dataset. | ||
| e.g. 'air_temperature' | ||
| cache_dir : path-like, optional | ||
| The directory in which to search for and write cached data. | ||
| cache : bool, optional | ||
| If True, then cache data locally for use on subsequent calls | ||
| **kws : dict, optional | ||
| Passed to xarray.open_dataset | ||
|
|
||
| See Also | ||
| -------- | ||
| tutorial.load_datatree | ||
| open_datatree | ||
| """ | ||
| try: | ||
| import pooch | ||
| except ImportError as e: | ||
| raise ImportError( | ||
| "tutorial.open_dataset depends on pooch to download and manage datasets." | ||
| " To proceed please install pooch." | ||
| ) from e | ||
|
|
||
| logger = pooch.get_logger() | ||
| logger.setLevel("WARNING") | ||
|
|
||
| cache_dir = _construct_cache_dir(cache_dir) | ||
| if name in external_urls: | ||
| url = external_urls[name] | ||
| else: | ||
| path = pathlib.Path(name) | ||
| if not path.suffix: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, imerghh_730.HDF5 and imerghh_830.HDF5, works for both engines. I think if we wanted to add EDIT: |
||
| # process the name | ||
| default_extension = ".nc" | ||
| if engine is None: | ||
| _check_netcdf_engine_installed(name) | ||
| path = path.with_suffix(default_extension) | ||
| elif path.suffix == ".grib": | ||
| if engine is None: | ||
| engine = "cfgrib" | ||
| try: | ||
| import cfgrib # noqa: F401 | ||
| except ImportError as e: | ||
| raise ImportError( | ||
| "Reading this tutorial dataset requires the cfgrib package." | ||
| ) from e | ||
|
|
||
| url = f"{base_url}/raw/{version}/{path.name}" | ||
|
|
||
| headers = {"User-Agent": f"xarray {sys.modules['xarray'].__version__}"} | ||
| downloader = pooch.HTTPDownloader(headers=headers) | ||
|
|
||
| # retrieve the file | ||
| filepath = pooch.retrieve( | ||
| url=url, known_hash=None, path=cache_dir, downloader=downloader | ||
| ) | ||
| ds = _open_datatree(filepath, engine=engine, **kws) | ||
| if not cache: | ||
| ds = ds.load() | ||
| pathlib.Path(filepath).unlink() | ||
|
|
||
| return ds | ||
|
|
||
|
|
||
| def load_datatree(*args, **kwargs) -> DataTree: | ||
| """ | ||
| Open, load into memory (as a `DataTree`), and close a dataset from the online repository | ||
| (requires internet). | ||
|
|
||
| If a local copy is found then always use that to avoid network traffic. | ||
|
|
||
| Available datasets: | ||
| * ``imerghh_730.HDF5`` IMERGHH_07 product from 2021-08-29T07:30:00.000Z | ||
| * ``imerghh_830.HDF5`` IMERGHH_07 product from 2021-08-29T08:30:00.000Z | ||
| * ``"air_temperature"``: NCEP reanalysis subset | ||
| * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients | ||
| * ``"basin_mask"``: Dataset with ocean basins marked using integers | ||
| * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 | ||
| * ``"rasm"``: Output of the Regional Arctic System Model (RASM) | ||
| * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output | ||
| * ``"tiny"``: small synthetic dataset with a 1D data variable | ||
| * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK | ||
| * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data | ||
| * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages | ||
|
|
||
| Parameters | ||
| ---------- | ||
| name : str | ||
| Name of the file containing the dataset. | ||
| e.g. 'air_temperature' | ||
| cache_dir : path-like, optional | ||
| The directory in which to search for and write cached data. | ||
| cache : bool, optional | ||
| If True, then cache data locally for use on subsequent calls | ||
| **kws : dict, optional | ||
| Passed to xarray.open_datatree | ||
|
|
||
| See Also | ||
| -------- | ||
| tutorial.open_datatree | ||
| open_datatree | ||
| load_datatree | ||
| """ | ||
| with open_datatree(*args, **kwargs) as ds: | ||
| return ds.load() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated this to use the
xarray.testingmodule'sassert_identicalbecausexarray.testsdidn't support DataTree objects.