Skip to content

Commit a92d411

Browse files
committed
BUG: fix outer join with MultiIndex, is_monotonic bug fix, GH #351
1 parent f458b65 commit a92d411

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ pandas 0.5.1
9191
- Fix failure passing Int64Index to Index.union when both are monotonic
9292
- Fix error when passing SparseSeries to (dense) DataFrame constructor
9393
- Added missing bang at top of setup.py (GH #352)
94+
- Change `is_monotonic` on MultiIndex so it properly compares the tuples
95+
- Fix MultiIndex outer join logic (GH #351)
9496

9597
Thanks
9698
------

pandas/core/index.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def values(self):
9191

9292
@cache_readonly
9393
def is_monotonic(self):
94-
return self._is_monotonic(self)
94+
return self._is_monotonic(self.values)
9595

9696
_indexMap = None
9797
_integrity = False
@@ -517,6 +517,9 @@ def join(self, other, how='left', return_indexers=False):
517517
return join_index
518518

519519
def _join_monotonic(self, other, how='left', return_indexers=False):
520+
this_vals = self.values
521+
other_vals = other.values
522+
520523
if how == 'left':
521524
join_index = self
522525
lidx = None
@@ -526,11 +529,13 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
526529
lidx = lib.left_join_indexer_object(other, self)
527530
ridx = None
528531
elif how == 'inner':
529-
join_index, lidx, ridx = lib.inner_join_indexer_object(self, other)
530-
join_index = Index(join_index)
532+
join_index, lidx, ridx = lib.inner_join_indexer_object(this_vals,
533+
other_vals)
534+
join_index = self._wrap_joined_index(join_index, other)
531535
elif how == 'outer':
532-
join_index, lidx, ridx = lib.outer_join_indexer_object(self, other)
533-
join_index = Index(join_index)
536+
join_index, lidx, ridx = lib.outer_join_indexer_object(this_vals,
537+
other_vals)
538+
join_index = self._wrap_joined_index(join_index, other)
534539
else: # pragma: no cover
535540
raise Exception('do not recognize join method %s' % how)
536541

@@ -539,6 +544,10 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
539544
else:
540545
return join_index
541546

547+
def _wrap_joined_index(self, joined, other):
548+
name = self.name if self.name == other.name else None
549+
return Index(joined, name=name)
550+
542551
def slice_locs(self, start=None, end=None):
543552
"""
544553
For an ordered Index, compute the slice locations for input labels
@@ -1641,6 +1650,11 @@ def _bounds(self):
16411650

16421651
return self.__bounds
16431652

1653+
1654+
def _wrap_joined_index(self, joined, other):
1655+
names = self.names if self.names == other.names else None
1656+
return MultiIndex.from_tuples(joined, names=names)
1657+
16441658
# For utility purposes
16451659

16461660
NULL_INDEX = Index([])

pandas/tests/test_frame.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3514,6 +3514,39 @@ def test_join_unconsolidated(self):
35143514

35153515
a.join(d)
35163516

3517+
def test_join_multiindex(self):
3518+
index1 = MultiIndex.from_arrays([['a','a','a','b','b','b'],
3519+
[1,2,3,1,2,3]],
3520+
names=['first', 'second'])
3521+
3522+
index2 = MultiIndex.from_arrays([['b','b','b','c','c','c'],
3523+
[1,2,3,1,2,3]],
3524+
names=['first', 'second'])
3525+
3526+
df1 = DataFrame(data=np.random.randn(6), index=index1,
3527+
columns=['var X'])
3528+
df2 = DataFrame(data=np.random.randn(6), index=index2,
3529+
columns=['var Y'])
3530+
3531+
df1 = df1.sortlevel(0)
3532+
df2 = df2.sortlevel(0)
3533+
3534+
joined = df1.join(df2, how='outer')
3535+
ex_index = index1.get_tuple_index() + index2.get_tuple_index()
3536+
expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
3537+
assert_frame_equal(joined, expected)
3538+
self.assertEqual(joined.index.names, index1.names)
3539+
3540+
df1 = df1.sortlevel(1)
3541+
df2 = df2.sortlevel(1)
3542+
3543+
joined = df1.join(df2, how='outer').sortlevel(0)
3544+
ex_index = index1.get_tuple_index() + index2.get_tuple_index()
3545+
expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
3546+
assert_frame_equal(joined, expected)
3547+
self.assertEqual(joined.index.names, index1.names)
3548+
3549+
35173550
def _join_by_hand(a, b, how='left'):
35183551
join_index = a.index.join(b.index, how=how)
35193552

0 commit comments

Comments
 (0)