@@ -254,7 +254,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
254254 """
255255 from pandas .core .series import Series
256256 from pandas .tools .tile import cut
257- from pandas import Index , PeriodIndex , DatetimeIndex
258257
259258 name = getattr (values , 'name' , None )
260259 values = Series (values ).values
@@ -266,71 +265,81 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
266265 raise TypeError ("bins argument only works with numeric data." )
267266 values = cat .codes
268267
269- if com .is_categorical_dtype (values .dtype ):
270- result = values .value_counts (dropna )
271-
268+ if com .is_extension_type (values ):
269+ result = values .value_counts (dropna = dropna )
270+ result .name = name
271+ counts = result .values
272272 else :
273+ # ndarray path
274+ keys , counts = _value_counts_arraylike (values , dropna = dropna )
273275
274- dtype = values .dtype
275- is_period = com .is_period_arraylike (values )
276- is_datetimetz = com .is_datetimetz (values )
276+ from pandas import Index
277+ if not isinstance (keys , Index ):
278+ keys = Index (keys )
279+ result = Series (counts , index = keys , name = name )
277280
278- if com .is_datetime_or_timedelta_dtype (dtype ) or is_period or \
279- is_datetimetz :
281+ if bins is not None :
282+ # TODO: This next line should be more efficient
283+ result = result .reindex (np .arange (len (cat .categories )),
284+ fill_value = 0 )
285+ result .index = bins [:- 1 ]
280286
281- if is_period :
282- values = PeriodIndex (values )
283- elif is_datetimetz :
284- tz = getattr (values , 'tz' , None )
285- values = DatetimeIndex (values ).tz_localize (None )
287+ if sort :
288+ result = result .sort_values (ascending = ascending )
286289
287- values = values . view ( np . int64 )
288- keys , counts = htable . value_count_scalar64 ( values , dropna )
290+ if normalize :
291+ result = result / float ( counts . sum () )
289292
290- if dropna :
291- msk = keys != iNaT
292- keys , counts = keys [msk ], counts [msk ]
293+ return result
293294
294- # localize to the original tz if necessary
295- if is_datetimetz :
296- keys = DatetimeIndex (keys ).tz_localize (tz )
297295
298- # convert the keys back to the dtype we came in
299- else :
300- keys = keys .astype (dtype )
296+ def _value_counts_arraylike (values , dropna = True ):
297+ from pandas import PeriodIndex , DatetimeIndex
301298
302- elif com .is_integer_dtype (dtype ):
303- values = com ._ensure_int64 (values )
304- keys , counts = htable .value_count_scalar64 (values , dropna )
305- elif com .is_float_dtype (dtype ):
306- values = com ._ensure_float64 (values )
307- keys , counts = htable .value_count_scalar64 (values , dropna )
299+ dtype = values .dtype
300+ is_period = com .is_period_arraylike (values )
301+ is_datetimetz = com .is_datetimetz (values )
308302
309- else :
310- values = com ._ensure_object (values )
311- mask = com .isnull (values )
312- keys , counts = htable .value_count_object (values , mask )
313- if not dropna and mask .any ():
314- keys = np .insert (keys , 0 , np .NaN )
315- counts = np .insert (counts , 0 , mask .sum ())
303+ if com .is_datetime_or_timedelta_dtype (dtype ) or is_period or \
304+ is_datetimetz :
316305
317- if not isinstance (keys , Index ):
318- keys = Index (keys )
319- result = Series (counts , index = keys , name = name )
306+ if is_period :
307+ values = PeriodIndex (values )
308+ elif is_datetimetz :
309+ tz = getattr (values , 'tz' , None )
310+ values = DatetimeIndex (values ).tz_localize (None )
320311
321- if bins is not None :
322- # TODO: This next line should be more efficient
323- result = result .reindex (np .arange (len (cat .categories )),
324- fill_value = 0 )
325- result .index = bins [:- 1 ]
312+ values = values .view (np .int64 )
313+ keys , counts = htable .value_count_scalar64 (values , dropna )
326314
327- if sort :
328- result = result .sort_values (ascending = ascending )
315+ if dropna :
316+ msk = keys != iNaT
317+ keys , counts = keys [msk ], counts [msk ]
329318
330- if normalize :
331- result = result / float (counts .sum ())
319+ # localize to the original tz if necessary
320+ if is_datetimetz :
321+ keys = DatetimeIndex (keys ).tz_localize (tz )
332322
333- return result
323+ # convert the keys back to the dtype we came in
324+ else :
325+ keys = keys .astype (dtype )
326+
327+ elif com .is_integer_dtype (dtype ):
328+ values = com ._ensure_int64 (values )
329+ keys , counts = htable .value_count_scalar64 (values , dropna )
330+ elif com .is_float_dtype (dtype ):
331+ values = com ._ensure_float64 (values )
332+ keys , counts = htable .value_count_scalar64 (values , dropna )
333+
334+ else :
335+ values = com ._ensure_object (values )
336+ mask = com .isnull (values )
337+ keys , counts = htable .value_count_object (values , mask )
338+ if not dropna and mask .any ():
339+ keys = np .insert (keys , 0 , np .NaN )
340+ counts = np .insert (counts , 0 , mask .sum ())
341+
342+ return keys , counts
334343
335344
336345def mode (values ):
0 commit comments