Skip to content

Commit ba70301

Browse files
Add data_vars="minimal" kwarg to open_mfdataset()`(#143)
* This new kwarg default value avoids data var dimension concatenation * Update docstrings for `open_dataset()` and `open_mfdataset()`
1 parent 68e8c00 commit ba70301

File tree

1 file changed

+42
-19
lines changed

1 file changed

+42
-19
lines changed

xcdat/dataset.py

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import pandas as pd
55
import xarray as xr
6+
from typing_extensions import Literal
67

78
from xcdat import bounds # noqa: F401
89
from xcdat.logger import setup_custom_logger
@@ -17,10 +18,15 @@ def open_dataset(
1718
1819
Operations include:
1920
20-
- If the dataset has a time dimension, decode both CF and non-CF time units.
21-
- Generate bounds for supported coordinates if they don't exist.
21+
- Decode both CF and non-CF compliant time units if the Dataset has a time
22+
dimension
23+
- Fill missing bounds for supported axis
2224
- Option to limit the Dataset to a single regular (non-bounds) data
23-
variable while retaining any bounds data variables.
25+
variable, while retaining any bounds data variables
26+
27+
``decode_times`` is statically set to ``False``. This enables a check
28+
for whether the units in the time dimension (if it exists) contains CF or
29+
non-CF compliant units, which determines if manual decoding is necessary.
2430
2531
Parameters
2632
----------
@@ -29,11 +35,8 @@ def open_dataset(
2935
data_var: Optional[str], optional
3036
The key of the data variable to keep in the Dataset, by default None.
3137
kwargs : Dict[str, Any]
32-
Additional arguments passed on to ``xarray.open_dataset``.
33-
34-
- Visit the xarray docs for accepted arguments [1]_.
35-
- ``decode_times`` defaults to ``False`` to allow for the manual
36-
decoding of non-CF time units.
38+
Additional arguments passed on to ``xarray.open_dataset``. Refer to the
39+
[1]_ xarray docs for accepted keyword arguments.
3740
3841
Returns
3942
-------
@@ -82,16 +85,29 @@ def open_dataset(
8285
def open_mfdataset(
8386
paths: Union[str, List[str]],
8487
data_var: Optional[str] = None,
88+
data_vars: Union[Literal["minimal", "different", "all"], List[str]] = "minimal",
8589
**kwargs: Dict[str, Any],
8690
) -> xr.Dataset:
8791
"""Wrapper for ``xarray.open_mfdataset()`` that applies common operations.
8892
8993
Operations include:
9094
91-
- If the dataset has a time dimension, decode both CF and non-CF time units.
92-
- Generate bounds for supported coordinates if they don't exist.
95+
- Decode both CF and non-CF compliant time units if the Dataset has a time
96+
dimension
97+
- Fill missing bounds for supported axis
9398
- Option to limit the Dataset to a single regular (non-bounds) data
94-
variable while retaining any bounds data variables.
99+
variable, while retaining any bounds data variables
100+
101+
``data_vars`` defaults to `"minimal"`, which concatenates data variables in
102+
a manner where only data variables in which the dimension already appears
103+
are included. For example, the time dimension will not be concatenated to
104+
the dimensions of non-time data variables such as "lat_bnds" or "lon_bnds".
105+
`"minimal"` is required for some XCDAT functions, including spatial
106+
averaging where a reduction is performed using the lat/lon bounds.
107+
108+
``decode_times`` is statically set to ``False``. This enables a check
109+
for whether the units in the time dimension (if it exists) contains CF or
110+
non-CF compliant units, which determines if manual decoding is necessary.
95111
96112
Parameters
97113
----------
@@ -103,13 +119,21 @@ def open_mfdataset(
103119
for details). (A string glob will be expanded to a 1-dimensional list.)
104120
data_var: Optional[str], optional
105121
The key of the data variable to keep in the Dataset, by default None.
122+
data_vars: Union[Literal["minimal", "different", "all"], List[str]], optional
123+
These data variables will be concatenated together:
124+
* "minimal": Only data variables in which the dimension already
125+
appears are included, default.
126+
* "different": Data variables which are not equal (ignoring
127+
attributes) across all datasets are also concatenated (as well as
128+
all for which dimension already appears). Beware: this option may
129+
load the data payload of data variables into memory if they are not
130+
already loaded.
131+
* "all": All data variables will be concatenated.
132+
* list of str: The listed data variables will be concatenated, in
133+
addition to the "minimal" data variables.
106134
kwargs : Dict[str, Any]
107-
Additional arguments passed on to ``xarray.open_mfdataset`` and/or
108-
``xarray.open_dataset``.
109-
110-
- Visit the xarray docs for accepted arguments, [2]_ and [3]_.
111-
- ``decode_times`` defaults to ``False`` to allow for the manual
112-
decoding of non-CF time units.
135+
Additional arguments passed on to ``xarray.open_mfdataset``. Refer to
136+
the [2]_ xarray docs for accepted keyword arguments.
113137
114138
Returns
115139
-------
@@ -127,7 +151,6 @@ def open_mfdataset(
127151
----------
128152
129153
.. [2] https://xarray.pydata.org/en/stable/generated/xarray.open_mfdataset.html
130-
.. [3] https://xarray.pydata.org/en/stable/generated/xarray.open_dataset.html
131154
132155
Examples
133156
--------
@@ -146,7 +169,7 @@ def open_mfdataset(
146169
>>> from xcdat.dataset import open_dataset
147170
>>> ds = open_mfdataset(["file_path1", "file_path2"], data_var=["ts", "tas"])
148171
"""
149-
ds = xr.open_mfdataset(paths, decode_times=False, **kwargs)
172+
ds = xr.open_mfdataset(paths, decode_times=False, data_vars=data_vars, **kwargs)
150173
ds = infer_or_keep_var(ds, data_var)
151174

152175
if ds.cf.dims.get("T") is not None:

0 commit comments

Comments
 (0)