Skip to content

Commit e678a1d

Browse files
benbovykeewis
andauthored
Add set_xindex and drop_indexes methods (#6971)
* temporary API to set custom indexes * add the temporary index API to DataArray * add options argument to Index.from_variables() It allows passing options to the constructor of a custom index class (if any). The **options arguments of Dataset.set_xindex() are passed through. Also add type annotations to set_xindex(). * fix mypy * remove temporary API warning * add the Index class in Xarray's root namespace * improve set_xindex docstrings and add to api.rst * remove temp comments * special case for pandas multi-index dim coord * add tests for set_xindex * error message tweaks * set_xindex with 1 coord: avoid reodering coords * mypy fixes * add Dataset and DataArray drop_indexes methods * improve assert_no_index_corrupted error msg * drop_indexes: add tests * add drop_indexes to api.rst * improve docstrings of legacy methods * add what's new entry * try using correct typing w/o mypy complaining * make index_cls arg optional Try setting a pandas (multi-)index by default. * docstrings fixes and tweaks * make Index.from_variables options arg keyword only * improve set_xindex invalid coordinates error msg * add xarray.indexes namespace * type tweaks Co-authored-by: Keewis <[email protected]>
1 parent 2f0f95a commit e678a1d

File tree

9 files changed

+415
-18
lines changed

9 files changed

+415
-18
lines changed

doc/api.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ Dataset contents
107107
Dataset.swap_dims
108108
Dataset.expand_dims
109109
Dataset.drop_vars
110+
Dataset.drop_indexes
110111
Dataset.drop_duplicates
111112
Dataset.drop_dims
112113
Dataset.set_coords
@@ -146,6 +147,7 @@ Indexing
146147
Dataset.reindex_like
147148
Dataset.set_index
148149
Dataset.reset_index
150+
Dataset.set_xindex
149151
Dataset.reorder_levels
150152
Dataset.query
151153

@@ -298,6 +300,7 @@ DataArray contents
298300
DataArray.swap_dims
299301
DataArray.expand_dims
300302
DataArray.drop_vars
303+
DataArray.drop_indexes
301304
DataArray.drop_duplicates
302305
DataArray.reset_coords
303306
DataArray.copy
@@ -330,6 +333,7 @@ Indexing
330333
DataArray.reindex_like
331334
DataArray.set_index
332335
DataArray.reset_index
336+
DataArray.set_xindex
333337
DataArray.reorder_levels
334338
DataArray.query
335339

@@ -1080,13 +1084,19 @@ Advanced API
10801084
Variable
10811085
IndexVariable
10821086
as_variable
1087+
indexes.Index
10831088
Context
10841089
register_dataset_accessor
10851090
register_dataarray_accessor
10861091
Dataset.set_close
10871092
backends.BackendArray
10881093
backends.BackendEntrypoint
10891094

1095+
Default, pandas-backed indexes built-in Xarray:
1096+
1097+
indexes.PandasIndex
1098+
indexes.PandasMultiIndex
1099+
10901100
These backends provide a low-level interface for lazily loading data from
10911101
external file-formats or protocols, and can be manually invoked to create
10921102
arguments for the ``load_store`` and ``dump_to_store`` Dataset methods:

doc/whats-new.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ v2022.07.0 (unreleased)
2121

2222
New Features
2323
~~~~~~~~~~~~
24+
25+
- Add :py:meth:`Dataset.set_xindex` and :py:meth:`Dataset.drop_indexes` and
26+
their DataArray counterpart for setting and dropping pandas or custom indexes
27+
given a set of arbitrary coordinates. (:pull:`6971`)
28+
By `Benoît Bovy <https://github.com/benbovy>`_ and `Justus Magin <https://github.com/keewis>`_.
2429
- Enable taking the mean of dask-backed :py:class:`cftime.datetime` arrays
2530
(:pull:`6556`, :pull:`6940`). By `Deepak Cherian
2631
<https://github.com/dcherian>`_ and `Spencer Clark

xarray/core/dataarray.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,6 +2349,11 @@ def set_index(
23492349
"""Set DataArray (multi-)indexes using one or more existing
23502350
coordinates.
23512351
2352+
This legacy method is limited to pandas (multi-)indexes and
2353+
1-dimensional "dimension" coordinates. See
2354+
:py:meth:`~DataArray.set_xindex` for setting a pandas or a custom
2355+
Xarray-compatible index from one or more arbitrary coordinates.
2356+
23522357
Parameters
23532358
----------
23542359
indexes : {dim: index, ...}
@@ -2393,6 +2398,7 @@ def set_index(
23932398
See Also
23942399
--------
23952400
DataArray.reset_index
2401+
DataArray.set_xindex
23962402
"""
23972403
ds = self._to_temp_dataset().set_index(indexes, append=append, **indexes_kwargs)
23982404
return self._from_temp_dataset(ds)
@@ -2406,6 +2412,12 @@ def reset_index(
24062412
) -> DataArray:
24072413
"""Reset the specified index(es) or multi-index level(s).
24082414
2415+
This legacy method is specific to pandas (multi-)indexes and
2416+
1-dimensional "dimension" coordinates. See the more generic
2417+
:py:meth:`~DataArray.drop_indexes` and :py:meth:`~DataArray.set_xindex`
2418+
method to respectively drop and set pandas or custom indexes for
2419+
arbitrary coordinates.
2420+
24092421
Parameters
24102422
----------
24112423
dims_or_levels : Hashable or sequence of Hashable
@@ -2424,10 +2436,41 @@ def reset_index(
24242436
See Also
24252437
--------
24262438
DataArray.set_index
2439+
DataArray.set_xindex
2440+
DataArray.drop_indexes
24272441
"""
24282442
ds = self._to_temp_dataset().reset_index(dims_or_levels, drop=drop)
24292443
return self._from_temp_dataset(ds)
24302444

2445+
def set_xindex(
2446+
self: T_DataArray,
2447+
coord_names: str | Sequence[Hashable],
2448+
index_cls: type[Index] | None = None,
2449+
**options,
2450+
) -> T_DataArray:
2451+
"""Set a new, Xarray-compatible index from one or more existing
2452+
coordinate(s).
2453+
2454+
Parameters
2455+
----------
2456+
coord_names : str or list
2457+
Name(s) of the coordinate(s) used to build the index.
2458+
If several names are given, their order matters.
2459+
index_cls : subclass of :class:`~xarray.indexes.Index`
2460+
The type of index to create. By default, try setting
2461+
a pandas (multi-)index from the supplied coordinates.
2462+
**options
2463+
Options passed to the index constructor.
2464+
2465+
Returns
2466+
-------
2467+
obj : DataArray
2468+
Another dataarray, with this dataarray's data and with a new index.
2469+
2470+
"""
2471+
ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options)
2472+
return self._from_temp_dataset(ds)
2473+
24312474
def reorder_levels(
24322475
self: T_DataArray,
24332476
dim_order: Mapping[Any, Sequence[int | Hashable]] | None = None,
@@ -2738,6 +2781,31 @@ def drop_vars(
27382781
ds = self._to_temp_dataset().drop_vars(names, errors=errors)
27392782
return self._from_temp_dataset(ds)
27402783

2784+
def drop_indexes(
2785+
self: T_DataArray,
2786+
coord_names: Hashable | Iterable[Hashable],
2787+
*,
2788+
errors: ErrorOptions = "raise",
2789+
) -> T_DataArray:
2790+
"""Drop the indexes assigned to the given coordinates.
2791+
2792+
Parameters
2793+
----------
2794+
coord_names : hashable or iterable of hashable
2795+
Name(s) of the coordinate(s) for which to drop the index.
2796+
errors : {"raise", "ignore"}, default: "raise"
2797+
If 'raise', raises a ValueError error if any of the coordinates
2798+
passed have no index or are not in the dataset.
2799+
If 'ignore', no error is raised.
2800+
2801+
Returns
2802+
-------
2803+
dropped : DataArray
2804+
A new dataarray with dropped indexes.
2805+
"""
2806+
ds = self._to_temp_dataset().drop_indexes(coord_names, errors=errors)
2807+
return self._from_temp_dataset(ds)
2808+
27412809
def drop(
27422810
self: T_DataArray,
27432811
labels: Mapping[Any, Any] | None = None,

0 commit comments

Comments
 (0)