Skip to content

Commit bd10f9f

Browse files
benbovydcherian
andauthored
Improve alignment checks (#10251)
* refactor alignment index conflict checks - Improved error messages (more context) - Simplified logic - Removed overly restrictive checks that caused alignment to fail when multiple indexes are set along common dimensions * add AlignmentError exception class * add tests * Fix CI * update whats new * fix doctests (custom exception class path) --------- Co-authored-by: Deepak Cherian <[email protected]> Co-authored-by: Deepak Cherian <[email protected]>
1 parent 97f02b4 commit bd10f9f

File tree

6 files changed

+84
-72
lines changed

6 files changed

+84
-72
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,7 @@ Exceptions
16441644
.. autosummary::
16451645
:toctree: generated/
16461646

1647+
AlignmentError
16471648
MergeError
16481649
SerializationWarning
16491650

doc/whats-new.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ New Features
3131
`Miguel Jimenez-Urias <https://github.com/Mikejmnez>`_.
3232
- Improved support pandas Extension Arrays. (:issue:`9661`, :pull:`9671`)
3333
By `Ilan Gold <https://github.com/ilan-gold>`_.
34-
34+
- Improved checks and errors raised when trying to align objects with conflicting indexes.
35+
It is now possible to align objects each with multiple indexes sharing common dimension(s).
36+
(:issue:`7695`, :pull:`10251`)
37+
By `Benoit Bovy <https://github.com/benbovy>`_.
3538

3639
Breaking changes
3740
~~~~~~~~~~~~~~~~

xarray/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
)
5151
from xarray.core.variable import IndexVariable, Variable, as_variable
5252
from xarray.namedarray.core import NamedArray
53-
from xarray.structure.alignment import align, broadcast
53+
from xarray.structure.alignment import AlignmentError, align, broadcast
5454
from xarray.structure.chunks import unify_chunks
5555
from xarray.structure.combine import combine_by_coords, combine_nested
5656
from xarray.structure.concat import concat
@@ -128,6 +128,7 @@
128128
"NamedArray",
129129
"Variable",
130130
# Exceptions
131+
"AlignmentError",
131132
"InvalidTreeError",
132133
"MergeError",
133134
"NotFoundInTreeError",

xarray/structure/alignment.py

Lines changed: 53 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@
3535
)
3636

3737

38+
class AlignmentError(ValueError):
39+
"""Error class for alignment failures due to incompatible arguments."""
40+
41+
3842
def reindex_variables(
3943
variables: Mapping[Any, Variable],
4044
dim_pos_indexers: Mapping[Any, Any],
@@ -196,7 +200,7 @@ def _normalize_indexes(
196200
for k, idx in indexes.items():
197201
if not isinstance(idx, Index):
198202
if getattr(idx, "dims", (k,)) != (k,):
199-
raise ValueError(
203+
raise AlignmentError(
200204
f"Indexer has dimensions {idx.dims} that are different "
201205
f"from that to be indexed along '{k}'"
202206
)
@@ -227,7 +231,7 @@ def _normalize_indexes(
227231
elif exclude_dims:
228232
excl_dims_str = ", ".join(str(d) for d in exclude_dims)
229233
incl_dims_str = ", ".join(str(d) for d in all_dims - exclude_dims)
230-
raise ValueError(
234+
raise AlignmentError(
231235
f"cannot exclude dimension(s) {excl_dims_str} from alignment because "
232236
"these are used by an index together with non-excluded dimensions "
233237
f"{incl_dims_str}"
@@ -268,7 +272,7 @@ def find_matching_indexes(self) -> None:
268272
for dim_sizes in all_indexes_dim_sizes.values():
269273
for dim, sizes in dim_sizes.items():
270274
if len(sizes) > 1:
271-
raise ValueError(
275+
raise AlignmentError(
272276
"cannot align objects with join='override' with matching indexes "
273277
f"along dimension {dim!r} that don't have the same size"
274278
)
@@ -283,47 +287,6 @@ def find_matching_unindexed_dims(self) -> None:
283287

284288
self.unindexed_dim_sizes = unindexed_dim_sizes
285289

286-
def assert_no_index_conflict(self) -> None:
287-
"""Check for uniqueness of both coordinate and dimension names across all sets
288-
of matching indexes.
289-
290-
We need to make sure that all indexes used for re-indexing or alignment
291-
are fully compatible and do not conflict each other.
292-
293-
Note: perhaps we could choose less restrictive constraints and instead
294-
check for conflicts among the dimension (position) indexers returned by
295-
`Index.reindex_like()` for each matching pair of object index / aligned
296-
index?
297-
(ref: https://github.com/pydata/xarray/issues/1603#issuecomment-442965602)
298-
299-
"""
300-
matching_keys = set(self.all_indexes) | set(self.indexes)
301-
302-
coord_count: dict[Hashable, int] = defaultdict(int)
303-
dim_count: dict[Hashable, int] = defaultdict(int)
304-
for coord_names_dims, _ in matching_keys:
305-
dims_set: set[Hashable] = set()
306-
for name, dims in coord_names_dims:
307-
coord_count[name] += 1
308-
dims_set.update(dims)
309-
for dim in dims_set:
310-
dim_count[dim] += 1
311-
312-
for count, msg in [(coord_count, "coordinates"), (dim_count, "dimensions")]:
313-
dup = {k: v for k, v in count.items() if v > 1}
314-
if dup:
315-
items_msg = ", ".join(
316-
f"{k!r} ({v} conflicting indexes)" for k, v in dup.items()
317-
)
318-
raise ValueError(
319-
"cannot re-index or align objects with conflicting indexes found for "
320-
f"the following {msg}: {items_msg}\n"
321-
"Conflicting indexes may occur when\n"
322-
"- they relate to different sets of coordinate and/or dimension names\n"
323-
"- they don't have the same type\n"
324-
"- they may be used to reindex data along common dimensions"
325-
)
326-
327290
def _need_reindex(self, dim, cmp_indexes) -> bool:
328291
"""Whether or not we need to reindex variables for a set of
329292
matching indexes.
@@ -383,11 +346,33 @@ def _get_index_joiner(self, index_cls) -> Callable:
383346
def align_indexes(self) -> None:
384347
"""Compute all aligned indexes and their corresponding coordinate variables."""
385348

386-
aligned_indexes = {}
387-
aligned_index_vars = {}
388-
reindex = {}
389-
new_indexes = {}
390-
new_index_vars = {}
349+
aligned_indexes: dict[MatchingIndexKey, Index] = {}
350+
aligned_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] = {}
351+
reindex: dict[MatchingIndexKey, bool] = {}
352+
new_indexes: dict[Hashable, Index] = {}
353+
new_index_vars: dict[Hashable, Variable] = {}
354+
355+
def update_dicts(
356+
key: MatchingIndexKey,
357+
idx: Index,
358+
idx_vars: dict[Hashable, Variable],
359+
need_reindex: bool,
360+
):
361+
reindex[key] = need_reindex
362+
aligned_indexes[key] = idx
363+
aligned_index_vars[key] = idx_vars
364+
365+
for name, var in idx_vars.items():
366+
if name in new_indexes:
367+
other_idx = new_indexes[name]
368+
other_var = new_index_vars[name]
369+
raise AlignmentError(
370+
f"cannot align objects on coordinate {name!r} because of conflicting indexes\n"
371+
f"first index: {idx!r}\nsecond index: {other_idx!r}\n"
372+
f"first variable: {var!r}\nsecond variable: {other_var!r}\n"
373+
)
374+
new_indexes[name] = idx
375+
new_index_vars[name] = var
391376

392377
for key, matching_indexes in self.all_indexes.items():
393378
matching_index_vars = self.all_index_vars[key]
@@ -419,7 +404,7 @@ def align_indexes(self) -> None:
419404
need_reindex = False
420405
if need_reindex:
421406
if self.join == "exact":
422-
raise ValueError(
407+
raise AlignmentError(
423408
"cannot align objects with join='exact' where "
424409
"index/labels/sizes are not equal along "
425410
"these coordinates (dimensions): "
@@ -437,25 +422,14 @@ def align_indexes(self) -> None:
437422
joined_index = matching_indexes[0]
438423
joined_index_vars = matching_index_vars[0]
439424

440-
reindex[key] = need_reindex
441-
aligned_indexes[key] = joined_index
442-
aligned_index_vars[key] = joined_index_vars
443-
444-
for name, var in joined_index_vars.items():
445-
new_indexes[name] = joined_index
446-
new_index_vars[name] = var
425+
update_dicts(key, joined_index, joined_index_vars, need_reindex)
447426

448427
# Explicitly provided indexes that are not found in objects to align
449428
# may relate to unindexed dimensions so we add them too
450429
for key, idx in self.indexes.items():
451430
if key not in aligned_indexes:
452431
index_vars = self.index_vars[key]
453-
reindex[key] = False
454-
aligned_indexes[key] = idx
455-
aligned_index_vars[key] = index_vars
456-
for name, var in index_vars.items():
457-
new_indexes[name] = idx
458-
new_index_vars[name] = var
432+
update_dicts(key, idx, index_vars, False)
459433

460434
self.aligned_indexes = aligned_indexes
461435
self.aligned_index_vars = aligned_index_vars
@@ -474,7 +448,7 @@ def assert_unindexed_dim_sizes_equal(self) -> None:
474448
else:
475449
add_err_msg = ""
476450
if len(sizes) > 1:
477-
raise ValueError(
451+
raise AlignmentError(
478452
f"cannot reindex or align along dimension {dim!r} "
479453
f"because of conflicting dimension sizes: {sizes!r}" + add_err_msg
480454
)
@@ -502,14 +476,25 @@ def _get_dim_pos_indexers(
502476
self,
503477
matching_indexes: dict[MatchingIndexKey, Index],
504478
) -> dict[Hashable, Any]:
505-
dim_pos_indexers = {}
479+
dim_pos_indexers: dict[Hashable, Any] = {}
480+
dim_index: dict[Hashable, Index] = {}
506481

507482
for key, aligned_idx in self.aligned_indexes.items():
508483
obj_idx = matching_indexes.get(key)
509484
if obj_idx is not None:
510485
if self.reindex[key]:
511486
indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs)
512-
dim_pos_indexers.update(indexers)
487+
for dim, idxer in indexers.items():
488+
if dim in dim_pos_indexers and not np.array_equal(
489+
idxer, dim_pos_indexers[dim]
490+
):
491+
raise AlignmentError(
492+
f"cannot reindex or align along dimension {dim!r} because "
493+
"of conflicting re-indexers returned by multiple indexes\n"
494+
f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n"
495+
)
496+
dim_pos_indexers[dim] = idxer
497+
dim_index[dim] = obj_idx
513498

514499
return dim_pos_indexers
515500

@@ -571,7 +556,6 @@ def align(self) -> None:
571556

572557
self.find_matching_indexes()
573558
self.find_matching_unindexed_dims()
574-
self.assert_no_index_conflict()
575559
self.align_indexes()
576560
self.assert_unindexed_dim_sizes_equal()
577561

@@ -735,7 +719,7 @@ def align(
735719
736720
Raises
737721
------
738-
ValueError
722+
AlignmentError
739723
If any dimensions without labels on the arguments have different sizes,
740724
or a different size than the size of the aligned dimension labels.
741725
@@ -853,7 +837,7 @@ def align(
853837
>>> a, b = xr.align(x, y, join="exact")
854838
Traceback (most recent call last):
855839
...
856-
ValueError: cannot align objects with join='exact' ...
840+
xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' ...
857841
858842
>>> a, b = xr.align(x, y, join="override")
859843
>>> a

xarray/structure/merge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ def merge(
942942
>>> xr.merge([x, y, z], join="exact")
943943
Traceback (most recent call last):
944944
...
945-
ValueError: cannot align objects with join='exact' where ...
945+
xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' where ...
946946
947947
Raises
948948
------

xarray/tests/test_dataset.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
import xarray as xr
2525
from xarray import (
26+
AlignmentError,
2627
DataArray,
2728
Dataset,
2829
IndexVariable,
@@ -2543,6 +2544,28 @@ def test_align_indexes(self) -> None:
25432544

25442545
assert_identical(expected_x2, x2)
25452546

2547+
def test_align_multiple_indexes_common_dim(self) -> None:
2548+
a = Dataset(coords={"x": [1, 2], "xb": ("x", [3, 4])}).set_xindex("xb")
2549+
b = Dataset(coords={"x": [1], "xb": ("x", [3])}).set_xindex("xb")
2550+
2551+
(a2, b2) = align(a, b, join="inner")
2552+
assert_identical(a2, b, check_default_indexes=False)
2553+
assert_identical(b2, b, check_default_indexes=False)
2554+
2555+
c = Dataset(coords={"x": [1, 3], "xb": ("x", [2, 4])}).set_xindex("xb")
2556+
2557+
with pytest.raises(AlignmentError, match=".*conflicting re-indexers"):
2558+
align(a, c)
2559+
2560+
def test_align_conflicting_indexes(self) -> None:
2561+
class CustomIndex(PandasIndex): ...
2562+
2563+
a = Dataset(coords={"xb": ("x", [3, 4])}).set_xindex("xb")
2564+
b = Dataset(coords={"xb": ("x", [3])}).set_xindex("xb", CustomIndex)
2565+
2566+
with pytest.raises(AlignmentError, match="cannot align.*conflicting indexes"):
2567+
align(a, b)
2568+
25462569
def test_align_non_unique(self) -> None:
25472570
x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]})
25482571
x1, x2 = align(x, x)

0 commit comments

Comments
 (0)