diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 5180b9a092f6c..f9f7f0847c650 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -45,6 +45,7 @@ Bug Fixes - Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) - Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`) +- Bug in ``pd.concat`` with dataframes heterogeneous in length and tuple ``keys`` (:issue:`14438`) - Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 9efaff6060909..fd1a23a5bab7f 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2055,14 +2055,14 @@ def _factorize_from_iterables(iterables): Returns ------- - codes_tuple : tuple of ndarrays - categories_tuple : tuple of Indexes + codes_list : list of ndarrays + categories_list : list of Indexes Notes ----- See `_factorize_from_iterable` for more info. """ if len(iterables) == 0: - # For consistency, it should return a list of 2 tuples. - return [(), ()] - return lzip(*[_factorize_from_iterable(it) for it in iterables]) + # For consistency, it should return a list of 2 lists. + return [[], []] + return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables])) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 81aa694577fb5..5b5236843643d 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -57,6 +57,24 @@ def test_concat_multiple_tzs(self): expected = DataFrame(dict(time=[ts2, ts3])) assert_frame_equal(results, expected) + def test_concat_tuple_keys(self): + # GH 14438 + df1 = pd.DataFrame(np.ones((2, 2)), columns=list('AB')) + df2 = pd.DataFrame(np.ones((3, 2)) * 2, columns=list('AB')) + results = pd.concat((df1, df2), keys=[('bee', 'bah'), ('bee', 'boo')]) + expected = pd.DataFrame( + {'A': {('bee', 'bah', 0): 1.0, + ('bee', 'bah', 1): 1.0, + ('bee', 'boo', 0): 2.0, + ('bee', 'boo', 1): 2.0, + ('bee', 'boo', 2): 2.0}, + 'B': {('bee', 'bah', 0): 1.0, + ('bee', 'bah', 1): 1.0, + ('bee', 'boo', 0): 2.0, + ('bee', 'boo', 1): 2.0, + ('bee', 'boo', 2): 2.0}}) + assert_frame_equal(results, expected) + def test_append_series_dict(self): df = DataFrame(np.random.randn(5, 4), columns=['foo', 'bar', 'baz', 'qux'])