From 26201aa0a9e4d7f48410bf8f244ead9d682d200a Mon Sep 17 00:00:00 2001 From: agraboso Date: Wed, 3 Aug 2016 21:08:59 -0400 Subject: [PATCH] BUG: allow describe() for DataFrames with only boolean columns Closes #13891 --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/core/generic.py | 7 +++--- pandas/tests/frame/test_analytics.py | 33 +++++++++++++++++++++++++++ pandas/tests/series/test_analytics.py | 21 +++++++++++++++++ 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f93e8f4240787..23839e99c5c06 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -885,6 +885,7 @@ Bug Fixes - Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) - Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`) +- Bug in ``DataFrame.describe()`` raising ``ValueError`` with only boolean columns (:issue:`13898`) - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) - Bug in ``.str.replace`` does not raise ``TypeError`` for invalid replacement (:issue:`13438`) - Bug in ``MultiIndex.from_arrays`` which didn't check for input array lengths matching (:issue:`13599`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f57b94fe0a326..17cc76e703631 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5138,10 +5138,9 @@ def describe_1d(data): if self.ndim == 1: return describe_1d(self) elif (include is None) and (exclude is None): - if len(self._get_numeric_data()._info_axis) > 0: - # when some numerics are found, keep only numerics - data = self.select_dtypes(include=[np.number]) - else: + # when some numerics are found, keep only numerics + data = self.select_dtypes(include=[np.number]) + if len(data.columns) == 0: data = self elif include == 'all': if exclude is not None: diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 370f3b5ee5b8b..390d796ced006 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -249,6 +249,39 @@ def test_bool_describe_in_mixed_frame(self): index=['count', 'unique', 'top', 'freq']) tm.assert_frame_equal(result, expected) + def test_describe_bool_frame(self): + # GH 13891 + df = pd.DataFrame({ + 'bool_data_1': [False, False, True, True], + 'bool_data_2': [False, True, True, True] + }) + result = df.describe() + expected = DataFrame({'bool_data_1': [4, 2, True, 2], + 'bool_data_2': [4, 2, True, 3]}, + index=['count', 'unique', 'top', 'freq']) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({ + 'bool_data': [False, False, True, True, False], + 'int_data': [0, 1, 2, 3, 4] + }) + result = df.describe() + expected = DataFrame({'int_data': [5, 2, df.int_data.std(), 0, 1, + 2, 3, 4]}, + index=['count', 'mean', 'std', 'min', '25%', + '50%', '75%', 'max']) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({ + 'bool_data': [False, False, True, True], + 'str_data': ['a', 'b', 'c', 'a'] + }) + result = df.describe() + expected = DataFrame({'bool_data': [4, 2, True, 2], + 'str_data': [4, 3, 'a', 2]}, + index=['count', 'unique', 'top', 'freq']) + tm.assert_frame_equal(result, expected) + def test_describe_categorical_columns(self): # GH 11558 columns = pd.CategoricalIndex(['int1', 'int2', 'obj'], diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 34cfb2f0c1529..6575c106f006f 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -260,6 +260,27 @@ def test_kurt(self): self.assertEqual(0, s.kurt()) self.assertTrue((df.kurt() == 0).all()) + def test_describe(self): + s = Series([0, 1, 2, 3, 4], name='int_data') + result = s.describe() + expected = Series([5, 2, s.std(), 0, 1, 2, 3, 4], + name='int_data', + index=['count', 'mean', 'std', 'min', '25%', + '50%', '75%', 'max']) + self.assert_series_equal(result, expected) + + s = Series([True, True, False, False, False], name='bool_data') + result = s.describe() + expected = Series([5, 2, False, 3], name='bool_data', + index=['count', 'unique', 'top', 'freq']) + self.assert_series_equal(result, expected) + + s = Series(['a', 'a', 'b', 'c', 'd'], name='str_data') + result = s.describe() + expected = Series([5, 4, 'a', 2], name='str_data', + index=['count', 'unique', 'top', 'freq']) + self.assert_series_equal(result, expected) + def test_argsort(self): self._check_accum_op('argsort', check_dtype=False) argsorted = self.ts.argsort()