diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index d572835a76218..1b52ee2d08370 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -45,8 +45,12 @@ Performance .. _whatsnew_0160.performance: + - Fixed a severe performance regression for ``.loc`` indexing with an array or list (:issue:9126:). +- Improved the speed of `nunique` by calling `unique` instead of `value_counts` (:issue:`9129`, :issue:`7771`) + + Bug Fixes ~~~~~~~~~ @@ -114,3 +118,4 @@ Bug Fixes - DataFrame now properly supports simultaneous ``copy`` and ``dtype`` arguments in constructor (:issue:`9099`) - Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`) +- isnull now detects NaT in PeriodIndex (:issue:`9129`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 04b431ae8cf67..c3b3024a16d0c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -441,7 +441,12 @@ def nunique(self, dropna=True): ------- nunique : int """ - return len(self.value_counts(dropna=dropna)) + uniqs = self.unique() + n = len(uniqs) + if dropna and com.isnull(uniqs).any(): + n -= 1 + return n + def factorize(self, sort=False, na_sentinel=-1): """ diff --git a/pandas/core/common.py b/pandas/core/common.py index e5ff353104fe9..143f65ee64e60 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -302,7 +302,7 @@ def _isnull_ndarraylike(obj): vec = lib.isnullobj(values.ravel()) result[...] = vec.reshape(shape) - elif dtype in _DATELIKE_DTYPES: + elif is_datetimelike(obj): # this is the NaT pattern result = values.view('i8') == tslib.iNaT else: @@ -2366,6 +2366,9 @@ def is_datetime_arraylike(arr): return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' return getattr(arr, 'inferred_type', None) == 'datetime' +def is_datetimelike(arr): + return arr.dtype in _DATELIKE_DTYPES or isinstance(arr, ABCPeriodIndex) + def _coerce_to_dtype(dtype): """ coerce a string / np.dtype to a dtype """ if is_categorical_dtype(dtype): diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 6c64cbc08ca63..2f57fa593bc40 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -157,6 +157,15 @@ def test_isnull_datetime(): assert(mask[0]) assert(not mask[1:].any()) + # GH 9129 + pidx = idx.to_period(freq='M') + mask = isnull(pidx) + assert(mask[0]) + assert(not mask[1:].any()) + + mask = isnull(pidx[1:]) + assert(not mask.any()) + class TestIsNull(tm.TestCase): def test_0d_array(self):