Skip to content

Commit 2392e7c

Browse files
committed
Move dtype handling to algorithm
1 parent 2f46f73 commit 2392e7c

File tree

3 files changed

+54
-34
lines changed

3 files changed

+54
-34
lines changed

pandas/core/algorithms.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pandas.core.common as com
1111
import pandas.algos as algos
1212
import pandas.hashtable as htable
13+
from pandas.types import api as gt
1314
from pandas.compat import string_types
1415
from pandas.tslib import iNaT
1516

@@ -253,27 +254,28 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
253254
254255
"""
255256
from pandas.core.series import Series
256-
from pandas.tools.tile import cut
257-
258257
name = getattr(values, 'name', None)
259-
values = Series(values).values
260258

261259
if bins is not None:
262260
try:
261+
from pandas.tools.tile import cut
262+
values = Series(values).values
263263
cat, bins = cut(values, bins, retbins=True)
264264
except TypeError:
265265
raise TypeError("bins argument only works with numeric data.")
266266
values = cat.codes
267267

268-
if com.is_extension_type(values):
269-
result = values.value_counts(dropna=dropna)
268+
if com.is_extension_type(values) and not com.is_datetimetz(values):
269+
# handle Categorical and sparse,
270+
# datetime tz can be handeled in ndarray path
271+
result = Series(values).values.value_counts(dropna=dropna)
270272
result.name = name
271273
counts = result.values
272274
else:
273-
# ndarray path
275+
# ndarray path. pass original to handle DatetimeTzBlock
274276
keys, counts = _value_counts_arraylike(values, dropna=dropna)
275277

276-
from pandas import Index
278+
from pandas import Index, Series
277279
if not isinstance(keys, Index):
278280
keys = Index(keys)
279281
result = Series(counts, index=keys, name=name)
@@ -294,20 +296,23 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
294296

295297

296298
def _value_counts_arraylike(values, dropna=True):
297-
from pandas import PeriodIndex, DatetimeIndex
299+
is_datetimetz = com.is_datetimetz(values)
300+
is_period = (isinstance(values, gt.ABCPeriodIndex) or
301+
com.is_period_arraylike(values))
302+
303+
orig = values
298304

305+
from pandas.core.series import Series
306+
values = Series(values).values
299307
dtype = values.dtype
300-
is_period = com.is_period_arraylike(values)
301-
is_datetimetz = com.is_datetimetz(values)
302308

303-
if com.is_datetime_or_timedelta_dtype(dtype) or is_period or \
304-
is_datetimetz:
309+
if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
310+
from pandas.tseries.index import DatetimeIndex
311+
from pandas.tseries.period import PeriodIndex
305312

306313
if is_period:
307314
values = PeriodIndex(values)
308-
elif is_datetimetz:
309-
tz = getattr(values, 'tz', None)
310-
values = DatetimeIndex(values).tz_localize(None)
315+
freq = values.freq
311316

312317
values = values.view(np.int64)
313318
keys, counts = htable.value_count_scalar64(values, dropna)
@@ -316,21 +321,25 @@ def _value_counts_arraylike(values, dropna=True):
316321
msk = keys != iNaT
317322
keys, counts = keys[msk], counts[msk]
318323

319-
# localize to the original tz if necessary
320-
if is_datetimetz:
321-
keys = DatetimeIndex(keys).tz_localize(tz)
322-
323324
# convert the keys back to the dtype we came in
324-
else:
325-
keys = keys.astype(dtype)
325+
keys = keys.astype(dtype)
326+
327+
# dtype handling
328+
if is_datetimetz:
329+
if isinstance(orig, gt.ABCDatetimeIndex):
330+
tz = orig.tz
331+
else:
332+
tz = orig.dt.tz
333+
keys = DatetimeIndex._simple_new(keys, tz=tz)
334+
if is_period:
335+
keys = PeriodIndex._simple_new(keys, freq=freq)
326336

327337
elif com.is_integer_dtype(dtype):
328338
values = com._ensure_int64(values)
329339
keys, counts = htable.value_count_scalar64(values, dropna)
330340
elif com.is_float_dtype(dtype):
331341
values = com._ensure_float64(values)
332342
keys, counts = htable.value_count_scalar64(values, dropna)
333-
334343
else:
335344
values = com._ensure_object(values)
336345
mask = com.isnull(values)

pandas/core/base.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -943,18 +943,6 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
943943
from pandas.core.algorithms import value_counts
944944
result = value_counts(self, sort=sort, ascending=ascending,
945945
normalize=normalize, bins=bins, dropna=dropna)
946-
947-
if isinstance(self, gt.ABCPeriodIndex):
948-
# preserve freq
949-
result.index = self._simple_new(result.index.values,
950-
freq=self.freq)
951-
elif com.is_datetimetz(self):
952-
if isinstance(self, gt.ABCDatetimeIndex):
953-
tz = self.tz
954-
else:
955-
tz = self.dt.tz
956-
result.index = result.index._simple_new(result.index.values,
957-
tz=tz)
958946
return result
959947

960948
def unique(self):

pandas/tests/series/test_analytics.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,29 @@ def test_value_counts_datetime_tz(self):
17281728
tm.assert_series_equal(s.value_counts(normalize=True), exp)
17291729
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
17301730

1731+
def test_value_counts_period(self):
1732+
values = [pd.Period('2011-01', freq='M'),
1733+
pd.Period('2011-02', freq='M'),
1734+
pd.Period('2011-03', freq='M'),
1735+
pd.Period('2011-01', freq='M'),
1736+
pd.Period('2011-01', freq='M'),
1737+
pd.Period('2011-03', freq='M')]
1738+
1739+
exp_idx = pd.PeriodIndex(['2011-01', '2011-03', '2011-02'], freq='M')
1740+
exp = pd.Series([3, 2, 1], index=exp_idx, name='xxx')
1741+
1742+
s = pd.Series(values, name='xxx')
1743+
tm.assert_series_equal(s.value_counts(), exp)
1744+
# check DatetimeIndex outputs the same result
1745+
idx = pd.PeriodIndex(values, name='xxx')
1746+
tm.assert_series_equal(idx.value_counts(), exp)
1747+
1748+
# normalize
1749+
exp = pd.Series(np.array([3., 2., 1]) / 6.,
1750+
index=exp_idx, name='xxx')
1751+
tm.assert_series_equal(s.value_counts(normalize=True), exp)
1752+
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
1753+
17311754
def test_value_counts_categorical_ordered(self):
17321755
# most dtypes are tested in test_base.py
17331756
values = pd.Categorical([1, 2, 3, 1, 1, 3], ordered=True)

0 commit comments

Comments
 (0)