From 2e833fa758647c66c99b84b19c0498945cfe8ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Fri, 5 Aug 2016 14:21:33 +0200 Subject: [PATCH 01/15] BUG: multi-type sparse slicing fixes and improvements Types were incorrectly determined when slicing SparseDataFrames with multiple dtypes (such as float and object). Also enables type inference for SparseArrays by default. --- pandas/core/internals.py | 5 ++++- pandas/tests/frame/test_indexing.py | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 18b67c41b4554..02d463c819b1f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4439,7 +4439,10 @@ def _lcd_dtype(l): """ find the lowest dtype that can accomodate the given types """ m = l[0].dtype for x in l[1:]: - if x.dtype.itemsize > m.itemsize: + # the new dtype must either be wider or a strict subtype + if (x.dtype.itemsize > m.itemsize or + (np.issubdtype(m, x.dtype) and + not np.issubdtype(x.dtype, m))): m = x.dtype return m diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 578df5ba9101e..b3281d91bc77e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2712,6 +2712,14 @@ def test_type_error_multiindex(self): result = dg['x', 0] assert_series_equal(result, expected) + def test_sparse_indexing_single_multitype(self): + from pandas import SparseDataFrame, SparseSeries + sdf = SparseDataFrame([[1, 2, 'a'], [4, 5, 'b']]) + tm.assert_sp_series_equal(sdf.iloc[0], + SparseSeries([1, 2, 'a'], name=0)) + tm.assert_sp_series_equal(sdf.iloc[1], + SparseSeries([4, 5, 'b'], name=1)) + class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): From fb6237c91a4d8bf9e7c5b5afe59e2ae38998a3ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Fri, 5 Aug 2016 14:35:29 +0200 Subject: [PATCH 02/15] Add a whatsnew note. --- doc/source/whatsnew/v0.19.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index a041e175d5f1a..1d614951e12a7 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -764,6 +764,7 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan` - Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) - Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`) - Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) +- Bug when slicing single rows of multi-type ``SparseDataFrames`` (:issue:`13917`) .. _whatsnew_0190.deprecations: From c7fb0f252fe256d043534a61700afd25a2097d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Mon, 8 Aug 2016 21:30:24 +0200 Subject: [PATCH 03/15] Use numpy to determine common dtypes. --- pandas/core/internals.py | 18 +++--------------- pandas/tests/frame/test_block_internals.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 02d463c819b1f..8897afad58de9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4435,17 +4435,6 @@ def _interleaved_dtype(blocks): for x in blocks: counts[type(x)].append(x) - def _lcd_dtype(l): - """ find the lowest dtype that can accomodate the given types """ - m = l[0].dtype - for x in l[1:]: - # the new dtype must either be wider or a strict subtype - if (x.dtype.itemsize > m.itemsize or - (np.issubdtype(m, x.dtype) and - not np.issubdtype(x.dtype, m))): - m = x.dtype - return m - have_int = len(counts[IntBlock]) > 0 have_bool = len(counts[BoolBlock]) > 0 have_object = len(counts[ObjectBlock]) > 0 @@ -4458,7 +4447,6 @@ def _lcd_dtype(l): # TODO: have_sparse is not used have_sparse = len(counts[SparseBlock]) > 0 # noqa have_numeric = have_float or have_complex or have_int - has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat if (have_object or @@ -4470,10 +4458,9 @@ def _lcd_dtype(l): elif have_bool: return np.dtype(bool) elif have_int and not have_float and not have_complex: - # if we are mixing unsigned and signed, then return # the next biggest int type (if we can) - lcd = _lcd_dtype(counts[IntBlock]) + lcd = np.find_common_type([b.dtype for b in counts[IntBlock]], []) kinds = set([i.dtype.kind for i in counts[IntBlock]]) if len(kinds) == 1: return lcd @@ -4489,7 +4476,8 @@ def _lcd_dtype(l): elif have_complex: return np.dtype('c16') else: - return _lcd_dtype(counts[FloatBlock] + counts[SparseBlock]) + introspection_blks = counts[FloatBlock] + counts[SparseBlock] + return np.find_common_type([b.dtype for b in introspection_blks], []) def _consolidate(blocks): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 38163d89355e9..8d41e197b401f 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -104,15 +104,20 @@ def test_as_matrix_lcd(self): values = self.mixed_float.as_matrix(['C']) self.assertEqual(values.dtype, np.float16) + # B uint64 forces float because there are other signed int types values = self.mixed_int.as_matrix(['A', 'B', 'C', 'D']) - self.assertEqual(values.dtype, np.int64) + self.assertEqual(values.dtype, np.float64) values = self.mixed_int.as_matrix(['A', 'D']) self.assertEqual(values.dtype, np.int64) - # guess all ints are cast to uints.... + # B uint64 forces float because there are other signed int types values = self.mixed_int.as_matrix(['A', 'B', 'C']) - self.assertEqual(values.dtype, np.int64) + self.assertEqual(values.dtype, np.float64) + + # as B and C are both unsigned, no forcing to float is needed + values = self.mixed_int.as_matrix(['B', 'C']) + self.assertEqual(values.dtype, np.uint64) values = self.mixed_int.as_matrix(['A', 'C']) self.assertEqual(values.dtype, np.int32) From 114217ee3a3e0f8bcbed1073d8eabfda840067f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Mon, 8 Aug 2016 21:30:56 +0200 Subject: [PATCH 04/15] Infer dtype instead of forcing float in SparseArray. --- pandas/sparse/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index e22a62ee7f917..39617821e97b9 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -162,7 +162,7 @@ class SparseArray(PandasObject, np.ndarray): fill_value = None def __new__(cls, data, sparse_index=None, index=None, kind='integer', - fill_value=None, dtype=np.float64, copy=False): + fill_value=None, dtype=None, copy=False): if index is not None: if data is None: From 33973a53af8cf6c611cfc14866e429bfc086d1a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Mon, 8 Aug 2016 21:31:06 +0200 Subject: [PATCH 05/15] Additional multitype tests. --- pandas/tests/frame/test_indexing.py | 65 +++++++++++++++++++++++++--- pandas/tests/series/test_indexing.py | 26 +++++++++++ 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index b3281d91bc77e..e586f68bc8daa 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2712,13 +2712,64 @@ def test_type_error_multiindex(self): result = dg['x', 0] assert_series_equal(result, expected) - def test_sparse_indexing_single_multitype(self): - from pandas import SparseDataFrame, SparseSeries - sdf = SparseDataFrame([[1, 2, 'a'], [4, 5, 'b']]) - tm.assert_sp_series_equal(sdf.iloc[0], - SparseSeries([1, 2, 'a'], name=0)) - tm.assert_sp_series_equal(sdf.iloc[1], - SparseSeries([4, 5, 'b'], name=1)) + +class TestSparseDataFrameMultitype(tm.TestCase): + def setUp(self): + super(TestSparseDataFrameMultitype, self).setUp() + self.string_series = pd.SparseSeries(['a', 'b', 'c']) + self.int_series = pd.SparseSeries([1, 2, 3]) + self.float_series = pd.SparseSeries([1.1, 1.2, 1.3]) + self.object_series = pd.SparseSeries([[], {}, set()]) + self.sdf = pd.SparseDataFrame({ + 'string': self.string_series, + 'int': self.int_series, + 'float': self.float_series, + 'object': self.object_series, + }) + self.cols = ['string', 'int', 'float', 'object'] + self.sdf = self.sdf[self.cols] + + def test_basic_dtypes(self): + for _, row in self.sdf.iterrows(): + self.assertEqual(row.dtype, object) + tm.assert_sp_series_equal(self.sdf['string'], self.string_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['int'], self.int_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['float'], self.float_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['object'], self.object_series, + check_names=False) + + def test_indexing_single(self): + tm.assert_sp_series_equal(self.sdf.iloc[0], + pd.SparseSeries(['a', 1, 1.1, []], + index=self.cols), + check_names=False) + tm.assert_sp_series_equal(self.sdf.iloc[1], + pd.SparseSeries(['b', 2, 1.2, {}], + index=self.cols), + check_names=False) + tm.assert_sp_series_equal(self.sdf.iloc[2], + pd.SparseSeries(['c', 3, 1.3, set()], + index=self.cols), + check_names=False) + + def test_indexing_multiple(self): + tm.assert_sp_frame_equal(self.sdf, self.sdf[:]) + tm.assert_sp_frame_equal(self.sdf, self.sdf.loc[:]) + tm.assert_sp_frame_equal(self.sdf.iloc[[1, 2]], + pd.SparseDataFrame({ + 'string': ['b', 'c'], + 'int': [2, 3], + 'float': [1.2, 1.3], + 'object': [{}, set()] + }, index=[1, 2])[self.cols]) + tm.assert_sp_frame_equal(self.sdf[['int', 'string']], + pd.SparseDataFrame({ + 'int': self.int_series, + 'string': self.string_series, + })) class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 64ebaa63cc10f..35235dbcaa246 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1856,3 +1856,29 @@ def test_multilevel_preserve_name(self): result2 = s.ix['foo'] self.assertEqual(result.name, s.name) self.assertEqual(result2.name, s.name) + + +class TestSparseSeriesMultitype(tm.TestCase): + def setUp(self): + super(TestSparseSeriesMultitype, self).setUp() + self.index = ['string', 'int', 'float', 'object'] + self.ss = pd.SparseSeries(['a', 1, 1.1, []], + index=self.index) + + def test_indexing_single(self): + for i, idx in enumerate(self.index): + self.assertEqual(self.ss.iloc[i], self.ss[idx]) + self.assertEqual(type(self.ss.iloc[i]), + type(self.ss[idx])) + self.assertEqual(self.ss['string'], 'a') + self.assertEqual(self.ss['int'], 1) + self.assertEqual(self.ss['float'], 1.1) + self.assertEqual(self.ss['object'], []) + + def test_indexing_multiple(self): + tm.assert_sp_series_equal(self.ss.loc[['string', 'int']], + pd.SparseSeries(['a', 1], + index=['string', 'int'])) + tm.assert_sp_series_equal(self.ss.loc[['string', 'object']], + pd.SparseSeries(['a', []], + index=['string', 'object'])) From 93d2de6ed07c44e046074405cac0bb0fbb70a471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Mon, 8 Aug 2016 21:37:54 +0200 Subject: [PATCH 06/15] Modified the whatsnew message. --- doc/source/whatsnew/v0.19.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 1d614951e12a7..9c3088f4e823b 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -764,7 +764,7 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan` - Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) - Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`) - Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) -- Bug when slicing single rows of multi-type ``SparseDataFrames`` (:issue:`13917`) +- Bug when interacting with multi-type SparseDataFrames: single row slicing now works because types are not forced to float (:issue:`13917`) .. _whatsnew_0190.deprecations: From 6782bc7280f09bed402dfbed5ea8b74e9c7b4211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Tue, 9 Aug 2016 12:08:34 +0200 Subject: [PATCH 07/15] Revert default argument change. --- pandas/sparse/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 39617821e97b9..e22a62ee7f917 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -162,7 +162,7 @@ class SparseArray(PandasObject, np.ndarray): fill_value = None def __new__(cls, data, sparse_index=None, index=None, kind='integer', - fill_value=None, dtype=None, copy=False): + fill_value=None, dtype=np.float64, copy=False): if index is not None: if data is None: From 21049480529921ff5aa8754bc9fbae125305fe98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Tue, 9 Aug 2016 12:09:52 +0200 Subject: [PATCH 08/15] Factor the common type discovery to an internal function. --- pandas/core/internals.py | 7 ++++--- pandas/core/ops.py | 4 ++-- pandas/tests/frame/test_indexing.py | 8 ++++---- pandas/types/cast.py | 6 ++++++ 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8897afad58de9..e9b45e444d8d8 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -35,7 +35,8 @@ _infer_dtype_from_scalar, _soft_convert_objects, _possibly_convert_objects, - _astype_nansafe) + _astype_nansafe, + _find_common_type) from pandas.types.missing import (isnull, array_equivalent, _is_na_compat, is_null_datelike_scalar) @@ -4460,7 +4461,7 @@ def _interleaved_dtype(blocks): elif have_int and not have_float and not have_complex: # if we are mixing unsigned and signed, then return # the next biggest int type (if we can) - lcd = np.find_common_type([b.dtype for b in counts[IntBlock]], []) + lcd = _find_common_type([b.dtype for b in counts[IntBlock]]) kinds = set([i.dtype.kind for i in counts[IntBlock]]) if len(kinds) == 1: return lcd @@ -4477,7 +4478,7 @@ def _interleaved_dtype(blocks): return np.dtype('c16') else: introspection_blks = counts[FloatBlock] + counts[SparseBlock] - return np.find_common_type([b.dtype for b in introspection_blks], []) + return _find_common_type([b.dtype for b in introspection_blks]) def _consolidate(blocks): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 44e3be32c23df..66d9391d2facf 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -30,7 +30,7 @@ is_bool_dtype, is_datetimetz, is_list_like, _ensure_object) -from pandas.types.cast import _maybe_upcast_putmask +from pandas.types.cast import _maybe_upcast_putmask, _find_common_type from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex # ----------------------------------------------------------------------------- @@ -616,7 +616,7 @@ def na_op(x, y): raise_on_error=True, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): - dtype = np.find_common_type([x.dtype, y.dtype], []) + dtype = _find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notnull(x) & notnull(y) result[mask] = op(x[mask], _values_from_object(y[mask])) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index e586f68bc8daa..ef5248cfe057e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2760,10 +2760,10 @@ def test_indexing_multiple(self): tm.assert_sp_frame_equal(self.sdf, self.sdf.loc[:]) tm.assert_sp_frame_equal(self.sdf.iloc[[1, 2]], pd.SparseDataFrame({ - 'string': ['b', 'c'], - 'int': [2, 3], - 'float': [1.2, 1.3], - 'object': [{}, set()] + 'string': self.string_series.iloc[[1, 2]], + 'int': self.int_series.iloc[[1, 2]], + 'float': self.float_series.iloc[[1, 2]], + 'object': self.object_series.iloc[[1, 2]] }, index=[1, 2])[self.cols]) tm.assert_sp_frame_equal(self.sdf[['int', 'string']], pd.SparseDataFrame({ diff --git a/pandas/types/cast.py b/pandas/types/cast.py index e37b418664ba3..b7ace3e619aeb 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -861,3 +861,9 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): value = _possibly_infer_to_datetimelike(value) return value + + +def _find_common_type(types): + """Find a common data type among the given dtypes.""" + # TODO: enable using pandas specific types + return np.find_common_type(types, []) From ac790d72ba7dfba0f4c2224310e080281c413a08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Tue, 9 Aug 2016 14:48:55 +0200 Subject: [PATCH 09/15] Modify .values docs to process issue #10364. --- pandas/core/generic.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 17cc76e703631..d0295afe990c8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2887,7 +2887,8 @@ def as_matrix(self, columns=None): e.g. If the dtypes are float16 and float32, dtype will be upcast to float32. If dtypes are int32 and uint8, dtype will be upcase to - int32. + int32. By numpy.find_common_type convention, mixing int64 and uint64 + will result in a flot64 dtype. This method is provided for backwards compatibility. Generally, it is recommended to use '.values'. @@ -2913,8 +2914,9 @@ def values(self): with care if you are not dealing with the blocks. e.g. If the dtypes are float16 and float32, dtype will be upcast to - float32. If dtypes are int32 and uint8, dtype will be upcase to - int32. + float32. If dtypes are int32 and uint8, dtype will be upcast to + int32. By numpy.find_common_type convention, mixing int64 and uint64 + will result in a flot64 dtype. """ return self.as_matrix() From eebcb23a40fa23ddf8bf6ef59d780406b5e6cfe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Tue, 9 Aug 2016 14:52:01 +0200 Subject: [PATCH 10/15] Moved multitype tests to sparse/tests/test_multitype.py --- pandas/sparse/tests/test_multitype.py | 88 +++++++++++++++++++++++++++ pandas/tests/frame/test_indexing.py | 59 ------------------ pandas/tests/series/test_indexing.py | 26 -------- 3 files changed, 88 insertions(+), 85 deletions(-) create mode 100644 pandas/sparse/tests/test_multitype.py diff --git a/pandas/sparse/tests/test_multitype.py b/pandas/sparse/tests/test_multitype.py new file mode 100644 index 0000000000000..e6a59a5ae7f32 --- /dev/null +++ b/pandas/sparse/tests/test_multitype.py @@ -0,0 +1,88 @@ +import numpy as np +import pandas as pd +import pandas.util.testing as tm + + +class TestSparseDataFrameMultitype(tm.TestCase): + def setUp(self): + super(TestSparseDataFrameMultitype, self).setUp() + self.string_series = pd.SparseSeries(['a', 'b', 'c']) + self.int_series = pd.SparseSeries([1, 2, 3]) + self.float_series = pd.SparseSeries([1.1, 1.2, 1.3]) + self.object_series = pd.SparseSeries([[], {}, set()]) + self.sdf = pd.SparseDataFrame({ + 'string': self.string_series, + 'int': self.int_series, + 'float': self.float_series, + 'object': self.object_series, + }) + self.cols = ['string', 'int', 'float', 'object'] + self.sdf = self.sdf[self.cols] + + def test_basic_dtypes(self): + for _, row in self.sdf.iterrows(): + self.assertEqual(row.dtype, object) + tm.assert_sp_series_equal(self.sdf['string'], self.string_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['int'], self.int_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['float'], self.float_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['object'], self.object_series, + check_names=False) + + def test_indexing_single(self): + tm.assert_sp_series_equal(self.sdf.iloc[0], + pd.SparseSeries(['a', 1, 1.1, []], + index=self.cols), + check_names=False) + tm.assert_sp_series_equal(self.sdf.iloc[1], + pd.SparseSeries(['b', 2, 1.2, {}], + index=self.cols), + check_names=False) + tm.assert_sp_series_equal(self.sdf.iloc[2], + pd.SparseSeries(['c', 3, 1.3, set()], + index=self.cols), + check_names=False) + + def test_indexing_multiple(self): + tm.assert_sp_frame_equal(self.sdf, self.sdf[:]) + tm.assert_sp_frame_equal(self.sdf, self.sdf.loc[:]) + tm.assert_sp_frame_equal(self.sdf.iloc[[1, 2]], + pd.SparseDataFrame({ + 'string': self.string_series.iloc[[1, 2]], + 'int': self.int_series.iloc[[1, 2]], + 'float': self.float_series.iloc[[1, 2]], + 'object': self.object_series.iloc[[1, 2]] + }, index=[1, 2])[self.cols]) + tm.assert_sp_frame_equal(self.sdf[['int', 'string']], + pd.SparseDataFrame({ + 'int': self.int_series, + 'string': self.string_series, + })) + + +class TestSparseSeriesMultitype(tm.TestCase): + def setUp(self): + super(TestSparseSeriesMultitype, self).setUp() + self.index = ['string', 'int', 'float', 'object'] + self.ss = pd.SparseSeries(['a', 1, 1.1, []], + index=self.index) + + def test_indexing_single(self): + for i, idx in enumerate(self.index): + self.assertEqual(self.ss.iloc[i], self.ss[idx]) + self.assertEqual(type(self.ss.iloc[i]), + type(self.ss[idx])) + self.assertEqual(self.ss['string'], 'a') + self.assertEqual(self.ss['int'], 1) + self.assertEqual(self.ss['float'], 1.1) + self.assertEqual(self.ss['object'], []) + + def test_indexing_multiple(self): + tm.assert_sp_series_equal(self.ss.loc[['string', 'int']], + pd.SparseSeries(['a', 1], + index=['string', 'int'])) + tm.assert_sp_series_equal(self.ss.loc[['string', 'object']], + pd.SparseSeries(['a', []], + index=['string', 'object'])) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ef5248cfe057e..578df5ba9101e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2713,65 +2713,6 @@ def test_type_error_multiindex(self): assert_series_equal(result, expected) -class TestSparseDataFrameMultitype(tm.TestCase): - def setUp(self): - super(TestSparseDataFrameMultitype, self).setUp() - self.string_series = pd.SparseSeries(['a', 'b', 'c']) - self.int_series = pd.SparseSeries([1, 2, 3]) - self.float_series = pd.SparseSeries([1.1, 1.2, 1.3]) - self.object_series = pd.SparseSeries([[], {}, set()]) - self.sdf = pd.SparseDataFrame({ - 'string': self.string_series, - 'int': self.int_series, - 'float': self.float_series, - 'object': self.object_series, - }) - self.cols = ['string', 'int', 'float', 'object'] - self.sdf = self.sdf[self.cols] - - def test_basic_dtypes(self): - for _, row in self.sdf.iterrows(): - self.assertEqual(row.dtype, object) - tm.assert_sp_series_equal(self.sdf['string'], self.string_series, - check_names=False) - tm.assert_sp_series_equal(self.sdf['int'], self.int_series, - check_names=False) - tm.assert_sp_series_equal(self.sdf['float'], self.float_series, - check_names=False) - tm.assert_sp_series_equal(self.sdf['object'], self.object_series, - check_names=False) - - def test_indexing_single(self): - tm.assert_sp_series_equal(self.sdf.iloc[0], - pd.SparseSeries(['a', 1, 1.1, []], - index=self.cols), - check_names=False) - tm.assert_sp_series_equal(self.sdf.iloc[1], - pd.SparseSeries(['b', 2, 1.2, {}], - index=self.cols), - check_names=False) - tm.assert_sp_series_equal(self.sdf.iloc[2], - pd.SparseSeries(['c', 3, 1.3, set()], - index=self.cols), - check_names=False) - - def test_indexing_multiple(self): - tm.assert_sp_frame_equal(self.sdf, self.sdf[:]) - tm.assert_sp_frame_equal(self.sdf, self.sdf.loc[:]) - tm.assert_sp_frame_equal(self.sdf.iloc[[1, 2]], - pd.SparseDataFrame({ - 'string': self.string_series.iloc[[1, 2]], - 'int': self.int_series.iloc[[1, 2]], - 'float': self.float_series.iloc[[1, 2]], - 'object': self.object_series.iloc[[1, 2]] - }, index=[1, 2])[self.cols]) - tm.assert_sp_frame_equal(self.sdf[['int', 'string']], - pd.SparseDataFrame({ - 'int': self.int_series, - 'string': self.string_series, - })) - - class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): _multiprocess_can_split_ = True diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 35235dbcaa246..64ebaa63cc10f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1856,29 +1856,3 @@ def test_multilevel_preserve_name(self): result2 = s.ix['foo'] self.assertEqual(result.name, s.name) self.assertEqual(result2.name, s.name) - - -class TestSparseSeriesMultitype(tm.TestCase): - def setUp(self): - super(TestSparseSeriesMultitype, self).setUp() - self.index = ['string', 'int', 'float', 'object'] - self.ss = pd.SparseSeries(['a', 1, 1.1, []], - index=self.index) - - def test_indexing_single(self): - for i, idx in enumerate(self.index): - self.assertEqual(self.ss.iloc[i], self.ss[idx]) - self.assertEqual(type(self.ss.iloc[i]), - type(self.ss[idx])) - self.assertEqual(self.ss['string'], 'a') - self.assertEqual(self.ss['int'], 1) - self.assertEqual(self.ss['float'], 1.1) - self.assertEqual(self.ss['object'], []) - - def test_indexing_multiple(self): - tm.assert_sp_series_equal(self.ss.loc[['string', 'int']], - pd.SparseSeries(['a', 1], - index=['string', 'int'])) - tm.assert_sp_series_equal(self.ss.loc[['string', 'object']], - pd.SparseSeries(['a', []], - index=['string', 'object'])) From 8d675adf9304b98905872c0bb769d6b12b350306 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Tue, 9 Aug 2016 15:14:02 +0200 Subject: [PATCH 11/15] Add tests for common dtypes, raises check for pandas ones. --- pandas/tests/types/test_cast.py | 41 ++++++++++++++++++++++++++++++++- pandas/types/cast.py | 7 +++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index dd3f07ea8157f..a8fd1199aa051 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -15,7 +15,10 @@ _possibly_convert_objects, _infer_dtype_from_scalar, _maybe_convert_string_to_object, - _maybe_convert_scalar) + _maybe_convert_scalar, + _find_common_type) +from pandas.types.dtypes import (CategoricalDtype, + DatetimeTZDtype) from pandas.util import testing as tm _multiprocess_can_split_ = True @@ -188,6 +191,42 @@ def test_possibly_convert_objects_copy(self): self.assertTrue(values is not out) +class TestCommonTypes(tm.TestCase): + def setUp(self): + super(TestCommonTypes, self).setUp() + + def test_numpy_dtypes(self): + self.assertEqual(_find_common_type([np.int64]), np.int64) + self.assertEqual(_find_common_type([np.uint64]), np.uint64) + self.assertEqual(_find_common_type([np.float32]), np.float32) + self.assertEqual(_find_common_type([np.object]), np.object) + + self.assertEqual(_find_common_type([np.int16, np.int64]), + np.int64) + self.assertEqual(_find_common_type([np.int32, np.uint32]), + np.int64) + self.assertEqual(_find_common_type([np.object, np.float32]), + np.object) + self.assertEqual(_find_common_type([np.object, np.int16]), + np.object) + self.assertEqual(_find_common_type([np.int16, np.float64]), + np.float64) + self.assertEqual(_find_common_type([np.float16, np.int16]), + np.float32) + self.assertEqual(_find_common_type([np.float16, np.int64]), + np.float64) + self.assertEqual(_find_common_type([np.complex128, np.int32]), + np.complex128) + self.assertEqual(_find_common_type([np.uint64, np.int64]), + np.float64) + + def test_pandas_dtypes(self): + with self.assertRaises(TypeError): + self.assertEqual(_find_common_type([CategoricalDtype]), + CategoricalDtype) + self.assertEqual(_find_common_type([DatetimeTZDtype]), + DatetimeTZDtype) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index b7ace3e619aeb..19d10bd54fa79 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -19,6 +19,7 @@ _ensure_int32, _ensure_int64, _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, _DATELIKE_DTYPES, _POSSIBLY_CAST_DTYPES) +from .dtypes import ExtensionDtype from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries from .missing import isnull, notnull from .inference import is_list_like @@ -865,5 +866,9 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): def _find_common_type(types): """Find a common data type among the given dtypes.""" - # TODO: enable using pandas specific types + # TODO: enable using pandas-specific types + if any(issubclass(t, ExtensionDtype) or isinstance(t, ExtensionDtype) + for t in types): + raise TypeError("Common type discovery is currently only " + "supported for pure numpy dtypes.") return np.find_common_type(types, []) From 442b8c1fd0d62f1b59910b805fac25b8cb0b3945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Tue, 9 Aug 2016 15:32:00 +0200 Subject: [PATCH 12/15] Whatsnew, issue tag, test reordering. --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/tests/frame/test_block_internals.py | 1 + pandas/tests/types/test_cast.py | 31 ++++++++++++++-------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 9c3088f4e823b..554636f6d79b1 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -437,6 +437,7 @@ API changes - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) - ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`) - ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`) +- ``.values`` will now return ``np.float64`` with a ``DataFrame`` with ``np.int64`` and ``np.uint64`` dtypes, conforming to ``np.find_common_type`` (:issue:`10364`, :issue:`13917`) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 8d41e197b401f..e51cc0f5a6ec7 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -104,6 +104,7 @@ def test_as_matrix_lcd(self): values = self.mixed_float.as_matrix(['C']) self.assertEqual(values.dtype, np.float16) + # GH 10364 # B uint64 forces float because there are other signed int types values = self.mixed_int.as_matrix(['A', 'B', 'C', 'D']) self.assertEqual(values.dtype, np.float64) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index a8fd1199aa051..8d1bbfe1c1b93 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -192,33 +192,42 @@ def test_possibly_convert_objects_copy(self): class TestCommonTypes(tm.TestCase): - def setUp(self): - super(TestCommonTypes, self).setUp() - def test_numpy_dtypes(self): + # identity self.assertEqual(_find_common_type([np.int64]), np.int64) self.assertEqual(_find_common_type([np.uint64]), np.uint64) self.assertEqual(_find_common_type([np.float32]), np.float32) self.assertEqual(_find_common_type([np.object]), np.object) + # into ints self.assertEqual(_find_common_type([np.int16, np.int64]), np.int64) self.assertEqual(_find_common_type([np.int32, np.uint32]), np.int64) - self.assertEqual(_find_common_type([np.object, np.float32]), - np.object) - self.assertEqual(_find_common_type([np.object, np.int16]), - np.object) - self.assertEqual(_find_common_type([np.int16, np.float64]), - np.float64) + self.assertEqual(_find_common_type([np.uint16, np.uint64]), + np.uint64) + + # into floats + self.assertEqual(_find_common_type([np.float16, np.float32]), + np.float32) self.assertEqual(_find_common_type([np.float16, np.int16]), np.float32) + self.assertEqual(_find_common_type([np.float32, np.int16]), + np.float32) + self.assertEqual(_find_common_type([np.uint64, np.int64]), + np.float64) + self.assertEqual(_find_common_type([np.int16, np.float64]), + np.float64) self.assertEqual(_find_common_type([np.float16, np.int64]), np.float64) + + # into others self.assertEqual(_find_common_type([np.complex128, np.int32]), np.complex128) - self.assertEqual(_find_common_type([np.uint64, np.int64]), - np.float64) + self.assertEqual(_find_common_type([np.object, np.float32]), + np.object) + self.assertEqual(_find_common_type([np.object, np.int16]), + np.object) def test_pandas_dtypes(self): with self.assertRaises(TypeError): From 926ca1e1e7cab91e064b688117b64f3f0767a102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Tue, 9 Aug 2016 18:13:52 +0200 Subject: [PATCH 13/15] Fix a derp. --- pandas/tests/types/test_cast.py | 5 +++-- pandas/types/cast.py | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 8d1bbfe1c1b93..f47a96a58cafc 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -231,9 +231,10 @@ def test_numpy_dtypes(self): def test_pandas_dtypes(self): with self.assertRaises(TypeError): - self.assertEqual(_find_common_type([CategoricalDtype]), + self.assertEqual(_find_common_type([CategoricalDtype()]), CategoricalDtype) - self.assertEqual(_find_common_type([DatetimeTZDtype]), + with self.assertRaises(TypeError): + self.assertEqual(_find_common_type([DatetimeTZDtype()]), DatetimeTZDtype) if __name__ == '__main__': diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 19d10bd54fa79..93be926fe1eeb 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -867,8 +867,7 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): def _find_common_type(types): """Find a common data type among the given dtypes.""" # TODO: enable using pandas-specific types - if any(issubclass(t, ExtensionDtype) or isinstance(t, ExtensionDtype) - for t in types): + if any(isinstance(t, ExtensionDtype) for t in types): raise TypeError("Common type discovery is currently only " "supported for pure numpy dtypes.") return np.find_common_type(types, []) From 057d56b096369d12658d7e858226e83a97b19c88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Wed, 10 Aug 2016 00:20:01 +0200 Subject: [PATCH 14/15] Wording and code organization fixes. --- doc/source/whatsnew/v0.19.0.txt | 4 +- pandas/sparse/tests/test_indexing.py | 78 ++++++++++++++++++++++++ pandas/sparse/tests/test_multitype.py | 88 --------------------------- pandas/tests/types/test_cast.py | 64 +++++++++---------- 4 files changed, 109 insertions(+), 125 deletions(-) delete mode 100644 pandas/sparse/tests/test_multitype.py diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 554636f6d79b1..76578f77d6896 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -437,7 +437,7 @@ API changes - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) - ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`) - ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`) -- ``.values`` will now return ``np.float64`` with a ``DataFrame`` with ``np.int64`` and ``np.uint64`` dtypes, conforming to ``np.find_common_type`` (:issue:`10364`, :issue:`13917`) +- ``DataFrame.values`` will now return ``float64`` with a ``DataFrame`` of mixed ``int64`` and ``uint64`` dtypes, conforming to ``np.find_common_type`` (:issue:`10364`, :issue:`13917`) @@ -765,7 +765,7 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan` - Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) - Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`) - Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) -- Bug when interacting with multi-type SparseDataFrames: single row slicing now works because types are not forced to float (:issue:`13917`) +- Bug in single row slicing on multi-type ``SparseDataFrame``s: types were previously forced to float (:issue:`13917`) .. _whatsnew_0190.deprecations: diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py index 1f88d22bd8f93..74c3785b06d77 100644 --- a/pandas/sparse/tests/test_indexing.py +++ b/pandas/sparse/tests/test_indexing.py @@ -829,3 +829,81 @@ def test_reindex_fill_value(self): res = sparse.reindex(['A', 'C', 'B']) exp = orig.reindex(['A', 'C', 'B']).to_sparse(fill_value=0) tm.assert_sp_frame_equal(res, exp) + + +class TestMultitype(tm.TestCase): + def setUp(self): + self.cols = ['string', 'int', 'float', 'object'] + + self.string_series = pd.SparseSeries(['a', 'b', 'c']) + self.int_series = pd.SparseSeries([1, 2, 3]) + self.float_series = pd.SparseSeries([1.1, 1.2, 1.3]) + self.object_series = pd.SparseSeries([[], {}, set()]) + self.sdf = pd.SparseDataFrame({ + 'string': self.string_series, + 'int': self.int_series, + 'float': self.float_series, + 'object': self.object_series, + }) + self.sdf = self.sdf[self.cols] + self.ss = pd.SparseSeries(['a', 1, 1.1, []], index=self.cols) + + def test_frame_basic_dtypes(self): + for _, row in self.sdf.iterrows(): + self.assertEqual(row.dtype, object) + tm.assert_sp_series_equal(self.sdf['string'], self.string_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['int'], self.int_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['float'], self.float_series, + check_names=False) + tm.assert_sp_series_equal(self.sdf['object'], self.object_series, + check_names=False) + + def test_frame_indexing_single(self): + tm.assert_sp_series_equal(self.sdf.iloc[0], + pd.SparseSeries(['a', 1, 1.1, []], + index=self.cols), + check_names=False) + tm.assert_sp_series_equal(self.sdf.iloc[1], + pd.SparseSeries(['b', 2, 1.2, {}], + index=self.cols), + check_names=False) + tm.assert_sp_series_equal(self.sdf.iloc[2], + pd.SparseSeries(['c', 3, 1.3, set()], + index=self.cols), + check_names=False) + + def test_frame_indexing_multiple(self): + tm.assert_sp_frame_equal(self.sdf, self.sdf[:]) + tm.assert_sp_frame_equal(self.sdf, self.sdf.loc[:]) + tm.assert_sp_frame_equal(self.sdf.iloc[[1, 2]], + pd.SparseDataFrame({ + 'string': self.string_series.iloc[[1, 2]], + 'int': self.int_series.iloc[[1, 2]], + 'float': self.float_series.iloc[[1, 2]], + 'object': self.object_series.iloc[[1, 2]] + }, index=[1, 2])[self.cols]) + tm.assert_sp_frame_equal(self.sdf[['int', 'string']], + pd.SparseDataFrame({ + 'int': self.int_series, + 'string': self.string_series, + })) + + def test_series_indexing_single(self): + for i, idx in enumerate(self.cols): + self.assertEqual(self.ss.iloc[i], self.ss[idx]) + self.assertEqual(type(self.ss.iloc[i]), + type(self.ss[idx])) + self.assertEqual(self.ss['string'], 'a') + self.assertEqual(self.ss['int'], 1) + self.assertEqual(self.ss['float'], 1.1) + self.assertEqual(self.ss['object'], []) + + def test_series_indexing_multiple(self): + tm.assert_sp_series_equal(self.ss.loc[['string', 'int']], + pd.SparseSeries(['a', 1], + index=['string', 'int'])) + tm.assert_sp_series_equal(self.ss.loc[['string', 'object']], + pd.SparseSeries(['a', []], + index=['string', 'object'])) diff --git a/pandas/sparse/tests/test_multitype.py b/pandas/sparse/tests/test_multitype.py deleted file mode 100644 index e6a59a5ae7f32..0000000000000 --- a/pandas/sparse/tests/test_multitype.py +++ /dev/null @@ -1,88 +0,0 @@ -import numpy as np -import pandas as pd -import pandas.util.testing as tm - - -class TestSparseDataFrameMultitype(tm.TestCase): - def setUp(self): - super(TestSparseDataFrameMultitype, self).setUp() - self.string_series = pd.SparseSeries(['a', 'b', 'c']) - self.int_series = pd.SparseSeries([1, 2, 3]) - self.float_series = pd.SparseSeries([1.1, 1.2, 1.3]) - self.object_series = pd.SparseSeries([[], {}, set()]) - self.sdf = pd.SparseDataFrame({ - 'string': self.string_series, - 'int': self.int_series, - 'float': self.float_series, - 'object': self.object_series, - }) - self.cols = ['string', 'int', 'float', 'object'] - self.sdf = self.sdf[self.cols] - - def test_basic_dtypes(self): - for _, row in self.sdf.iterrows(): - self.assertEqual(row.dtype, object) - tm.assert_sp_series_equal(self.sdf['string'], self.string_series, - check_names=False) - tm.assert_sp_series_equal(self.sdf['int'], self.int_series, - check_names=False) - tm.assert_sp_series_equal(self.sdf['float'], self.float_series, - check_names=False) - tm.assert_sp_series_equal(self.sdf['object'], self.object_series, - check_names=False) - - def test_indexing_single(self): - tm.assert_sp_series_equal(self.sdf.iloc[0], - pd.SparseSeries(['a', 1, 1.1, []], - index=self.cols), - check_names=False) - tm.assert_sp_series_equal(self.sdf.iloc[1], - pd.SparseSeries(['b', 2, 1.2, {}], - index=self.cols), - check_names=False) - tm.assert_sp_series_equal(self.sdf.iloc[2], - pd.SparseSeries(['c', 3, 1.3, set()], - index=self.cols), - check_names=False) - - def test_indexing_multiple(self): - tm.assert_sp_frame_equal(self.sdf, self.sdf[:]) - tm.assert_sp_frame_equal(self.sdf, self.sdf.loc[:]) - tm.assert_sp_frame_equal(self.sdf.iloc[[1, 2]], - pd.SparseDataFrame({ - 'string': self.string_series.iloc[[1, 2]], - 'int': self.int_series.iloc[[1, 2]], - 'float': self.float_series.iloc[[1, 2]], - 'object': self.object_series.iloc[[1, 2]] - }, index=[1, 2])[self.cols]) - tm.assert_sp_frame_equal(self.sdf[['int', 'string']], - pd.SparseDataFrame({ - 'int': self.int_series, - 'string': self.string_series, - })) - - -class TestSparseSeriesMultitype(tm.TestCase): - def setUp(self): - super(TestSparseSeriesMultitype, self).setUp() - self.index = ['string', 'int', 'float', 'object'] - self.ss = pd.SparseSeries(['a', 1, 1.1, []], - index=self.index) - - def test_indexing_single(self): - for i, idx in enumerate(self.index): - self.assertEqual(self.ss.iloc[i], self.ss[idx]) - self.assertEqual(type(self.ss.iloc[i]), - type(self.ss[idx])) - self.assertEqual(self.ss['string'], 'a') - self.assertEqual(self.ss['int'], 1) - self.assertEqual(self.ss['float'], 1.1) - self.assertEqual(self.ss['object'], []) - - def test_indexing_multiple(self): - tm.assert_sp_series_equal(self.ss.loc[['string', 'int']], - pd.SparseSeries(['a', 1], - index=['string', 'int'])) - tm.assert_sp_series_equal(self.ss.loc[['string', 'object']], - pd.SparseSeries(['a', []], - index=['string', 'object'])) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index f47a96a58cafc..3394974d833fb 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -193,43 +193,37 @@ def test_possibly_convert_objects_copy(self): class TestCommonTypes(tm.TestCase): def test_numpy_dtypes(self): - # identity - self.assertEqual(_find_common_type([np.int64]), np.int64) - self.assertEqual(_find_common_type([np.uint64]), np.uint64) - self.assertEqual(_find_common_type([np.float32]), np.float32) - self.assertEqual(_find_common_type([np.object]), np.object) - - # into ints - self.assertEqual(_find_common_type([np.int16, np.int64]), - np.int64) - self.assertEqual(_find_common_type([np.int32, np.uint32]), - np.int64) - self.assertEqual(_find_common_type([np.uint16, np.uint64]), - np.uint64) - - # into floats - self.assertEqual(_find_common_type([np.float16, np.float32]), - np.float32) - self.assertEqual(_find_common_type([np.float16, np.int16]), - np.float32) - self.assertEqual(_find_common_type([np.float32, np.int16]), - np.float32) - self.assertEqual(_find_common_type([np.uint64, np.int64]), - np.float64) - self.assertEqual(_find_common_type([np.int16, np.float64]), - np.float64) - self.assertEqual(_find_common_type([np.float16, np.int64]), - np.float64) - - # into others - self.assertEqual(_find_common_type([np.complex128, np.int32]), - np.complex128) - self.assertEqual(_find_common_type([np.object, np.float32]), - np.object) - self.assertEqual(_find_common_type([np.object, np.int16]), - np.object) + # (source_types, destination_type) + testcases = ( + # identity + ((np.int64,), np.int64), + ((np.uint64,), np.uint64), + ((np.float32,), np.float32), + ((np.object,), np.object), + + # into ints + ((np.int16, np.int64), np.int64), + ((np.int32, np.uint32), np.int64), + ((np.uint16, np.uint64), np.uint64), + + # into floats + ((np.float16, np.float32), np.float32), + ((np.float16, np.int16), np.float32), + ((np.float32, np.int16), np.float32), + ((np.uint64, np.int64), np.float64), + ((np.int16, np.float64), np.float64), + ((np.float16, np.int64), np.float64), + + # into others + ((np.complex128, np.int32), np.complex128), + ((np.object, np.float32), np.object), + ((np.object, np.int16), np.object), + ) + for src, common in testcases: + self.assertEqual(_find_common_type(src), common) def test_pandas_dtypes(self): + # TODO: not implemented yet with self.assertRaises(TypeError): self.assertEqual(_find_common_type([CategoricalDtype()]), CategoricalDtype) From 8c7d1ea7660bbf1e9351239abf39ac6c7a30a216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Wed, 10 Aug 2016 00:31:37 +0200 Subject: [PATCH 15/15] Colon to comma. --- doc/source/whatsnew/v0.19.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 76578f77d6896..f8ce963253887 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -765,7 +765,7 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan` - Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) - Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`) - Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) -- Bug in single row slicing on multi-type ``SparseDataFrame``s: types were previously forced to float (:issue:`13917`) +- Bug in single row slicing on multi-type ``SparseDataFrame``s, types were previously forced to float (:issue:`13917`) .. _whatsnew_0190.deprecations: