Skip to content
This repository was archived by the owner on Dec 22, 2019. It is now read-only.

Implement sort=None behavior #8

Merged
merged 4 commits into from
Oct 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6423,7 +6423,44 @@ def _append_list_of_frames(self, other, *args, **kwargs):
from pandas.core.indexes.api import _normalize_dataframes
from pandas.core.reshape.concat import concat

# TODO: sorting behavior when sort=None
# sorting behavior when sort=None
# TODO: remove when kwarg value change
if sort is None:
# stabilish desired behavior
if _obj_type in (dict, Series):
# dict/ser

sort = False
warn = False
elif _item_type in (dict, Series):
# [dict]/[ser]

if (self.columns.get_indexer(other[0].columns) >= 0).all():
# self.columns >= other[0].columns
sort = False
warn = False
else:
sort = True
types = [df.columns.dtype for df in [self] + other]
common = find_common_type(types)
warn = (common == object)
else:
# frame/[frame]

if all(self.columns.equals(df.columns) for df in other):
# all values the same
sort = False
warn = False
else:
sort = True
types = [df.columns.dtype for df in [self] + other]
common = find_common_type(types)
warn = (common == object)

# warn if necessary
if warn:
from pandas.core.indexes.api import _sort_msg
warnings.warn(_sort_msg, FutureWarning)

# The behavior of concat is a bit problematic as it is. To get around,
# we prepare the DataFrames before feeding them into concat.
Expand Down
143 changes: 143 additions & 0 deletions pandas/tests/reshape/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

import pandas as pd
from pandas import DataFrame, Index, Series
from pandas.core.indexes.base import InvalidIndexError
from pandas.util.testing import assert_frame_equal

Expand Down Expand Up @@ -328,6 +329,148 @@ def test_no_unecessary_upcast(self, sort):
assert_frame_equal(result, expected)


class TestAppendSortNone(object):
"""Regression tests to preserve the behavior of sort=None
"""

def generate_frames(self, compare, special):
if compare == 'lt':
if special:
df1 = DataFrame([[11, 12]], columns=[2, 1])
df2 = DataFrame([[13, 14, 15]], columns=[3, 2, 1])
else:
df1 = DataFrame([[11, 12]], columns=list('ba'))
df2 = DataFrame([[13, 14, 15]], columns=list('cba'))
elif compare == 'eq':
if special:
df1 = DataFrame([[11, 12, 13]], columns=[3, 2, 1])
df2 = DataFrame([[14, 15, 16]], columns=[3, 2, 1])
else:
df1 = DataFrame([[11, 12, 13]], columns=list('cba'))
df2 = DataFrame([[14, 15, 16]], columns=list('cba'))
elif compare == 'gt':
if special:
df1 = DataFrame([[11, 12, 13]], columns=[3, 2, 1])
df2 = DataFrame([[14, 15]], columns=[2, 1])
else:
df1 = DataFrame([[11, 12, 13]], columns=list('cba'))
df2 = DataFrame([[14, 15]], columns=list('ba'))
elif compare == 'dups':
# special category for duplicates
# assumes compare = 'eq'
if special:
df1 = DataFrame([[11, 12, 13]], columns=[3, 3, 1])
df2 = DataFrame([[14, 15, 16]], columns=[3, 3, 1])
else:
df1 = DataFrame([[11, 12, 13]], columns=list('cca'))
df2 = DataFrame([[14, 15, 16]], columns=list('cca'))

# avoid upcasting problems
df1 = df1.astype('float64')
df2 = df2.astype('float64')

return df1, df2

def merge_indexes(self, idx1, idx2, sort):
len1 = idx1.size
len2 = idx2.size

if len1 < len2:
# match 'lt' in self.generate_frames
vals1 = idx1.tolist()
vals2 = [idx2.tolist()[0]]
result = Index(vals1 + vals2)
else:
result = idx1.copy()

return result.sort_values() if sort else result

def merge_frames(self, df1, df2, sort):
new_index = self.merge_indexes(df1.columns, df2.columns, sort)
df1 = df1.reindex(new_index, axis=1)
df2 = df2.reindex(new_index, axis=1)

values = np.vstack([df1.values[0, :], df2.values[0, :]])
result = DataFrame(values, columns=new_index)
return result

@pytest.mark.parametrize('input_type', ['series', 'dict'])
@pytest.mark.parametrize('special', [True, False])
@pytest.mark.parametrize('compare', ['lt', 'eq', 'gt', 'dups'])
def test_append_series_dict(self, compare, special, input_type):
# When appending a Series or dict, the resulting columns come unsorted
# and no warning is raised.

sorts = False
warns = False

df1, df2 = self.generate_frames(compare, special)
if input_type == 'series':
other = df2.loc[0]
else:
other = df2.loc[0].to_dict()
if compare == 'dups':
return

ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None)
expected = self.merge_frames(df1, df2, sorts)
with ctx:
result = df1.append(other, ignore_index=True, sort=None)
assert_frame_equal(result, expected)

@pytest.mark.parametrize('input_type', ['[series]', '[dict]'])
@pytest.mark.parametrize('special', [True, False])
@pytest.mark.parametrize('compare', ['lt', 'eq', 'gt']) # dups won't work
def test_append_list_of_series_dict(self, compare, special, input_type):
# When appending a list of Series or list of dicts, the behavior is
# as specified below.

if compare in ('gt', 'eq'):
sorts = False
warns = False
else:
sorts = True
warns = not special

df1, df2 = self.generate_frames(compare, special)
if input_type == '[series]':
other = [df2.loc[0]]
else:
other = [df2.loc[0].to_dict()]

ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None)
expected = self.merge_frames(df1, df2, sorts)
with ctx:
result = df1.append(other, ignore_index=True, sort=None)
assert_frame_equal(result, expected)

@pytest.mark.parametrize('input_type', ['dataframe', '[dataframe]'])
@pytest.mark.parametrize('special', [True, False])
@pytest.mark.parametrize('compare', ['lt', 'eq', 'gt', 'dups'])
def test_append_dframe_list_of_dframe(self, compare, special, input_type):
# When appenindg a DataFrame of list of DataFrames, the behavior is as
# specified below.

if compare in ('dups', 'eq'):
sorts = False
warns = False
else:
sorts = True
warns = not special

df1, df2 = self.generate_frames(compare, special)
if input_type == 'dataframe':
other = df2
else:
other = [df2]

ctx = pytest.warns(FutureWarning) if warns else pytest.warns(None)
expected = self.merge_frames(df1, df2, sorts)
with ctx:
result = df1.append(other, ignore_index=True, sort=None)
assert_frame_equal(result, expected)


class TestAppendColumnsIndex(object):
@pytest.mark.parametrize('idx_name3', [None, 'foo', 'bar', 'baz'])
@pytest.mark.parametrize('idx_name2', [None, 'foo', 'bar', 'baz'])
Expand Down