@@ -254,7 +254,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
254
254
"""
255
255
from pandas .core .series import Series
256
256
from pandas .tools .tile import cut
257
- from pandas import Index , PeriodIndex , DatetimeIndex
258
257
259
258
name = getattr (values , 'name' , None )
260
259
values = Series (values ).values
@@ -266,71 +265,81 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
266
265
raise TypeError ("bins argument only works with numeric data." )
267
266
values = cat .codes
268
267
269
- if com .is_categorical_dtype (values .dtype ):
270
- result = values .value_counts (dropna )
271
-
268
+ if com .is_extension_type (values ):
269
+ result = values .value_counts (dropna = dropna )
270
+ result .name = name
271
+ counts = result .values
272
272
else :
273
+ # ndarray path
274
+ keys , counts = _value_counts_arraylike (values , dropna = dropna )
273
275
274
- dtype = values .dtype
275
- is_period = com .is_period_arraylike (values )
276
- is_datetimetz = com .is_datetimetz (values )
276
+ from pandas import Index
277
+ if not isinstance (keys , Index ):
278
+ keys = Index (keys )
279
+ result = Series (counts , index = keys , name = name )
277
280
278
- if com .is_datetime_or_timedelta_dtype (dtype ) or is_period or \
279
- is_datetimetz :
281
+ if bins is not None :
282
+ # TODO: This next line should be more efficient
283
+ result = result .reindex (np .arange (len (cat .categories )),
284
+ fill_value = 0 )
285
+ result .index = bins [:- 1 ]
280
286
281
- if is_period :
282
- values = PeriodIndex (values )
283
- elif is_datetimetz :
284
- tz = getattr (values , 'tz' , None )
285
- values = DatetimeIndex (values ).tz_localize (None )
287
+ if sort :
288
+ result = result .sort_values (ascending = ascending )
286
289
287
- values = values . view ( np . int64 )
288
- keys , counts = htable . value_count_scalar64 ( values , dropna )
290
+ if normalize :
291
+ result = result / float ( counts . sum () )
289
292
290
- if dropna :
291
- msk = keys != iNaT
292
- keys , counts = keys [msk ], counts [msk ]
293
+ return result
293
294
294
- # localize to the original tz if necessary
295
- if is_datetimetz :
296
- keys = DatetimeIndex (keys ).tz_localize (tz )
297
295
298
- # convert the keys back to the dtype we came in
299
- else :
300
- keys = keys .astype (dtype )
296
+ def _value_counts_arraylike (values , dropna = True ):
297
+ from pandas import PeriodIndex , DatetimeIndex
301
298
302
- elif com .is_integer_dtype (dtype ):
303
- values = com ._ensure_int64 (values )
304
- keys , counts = htable .value_count_scalar64 (values , dropna )
305
- elif com .is_float_dtype (dtype ):
306
- values = com ._ensure_float64 (values )
307
- keys , counts = htable .value_count_scalar64 (values , dropna )
299
+ dtype = values .dtype
300
+ is_period = com .is_period_arraylike (values )
301
+ is_datetimetz = com .is_datetimetz (values )
308
302
309
- else :
310
- values = com ._ensure_object (values )
311
- mask = com .isnull (values )
312
- keys , counts = htable .value_count_object (values , mask )
313
- if not dropna and mask .any ():
314
- keys = np .insert (keys , 0 , np .NaN )
315
- counts = np .insert (counts , 0 , mask .sum ())
303
+ if com .is_datetime_or_timedelta_dtype (dtype ) or is_period or \
304
+ is_datetimetz :
316
305
317
- if not isinstance (keys , Index ):
318
- keys = Index (keys )
319
- result = Series (counts , index = keys , name = name )
306
+ if is_period :
307
+ values = PeriodIndex (values )
308
+ elif is_datetimetz :
309
+ tz = getattr (values , 'tz' , None )
310
+ values = DatetimeIndex (values ).tz_localize (None )
320
311
321
- if bins is not None :
322
- # TODO: This next line should be more efficient
323
- result = result .reindex (np .arange (len (cat .categories )),
324
- fill_value = 0 )
325
- result .index = bins [:- 1 ]
312
+ values = values .view (np .int64 )
313
+ keys , counts = htable .value_count_scalar64 (values , dropna )
326
314
327
- if sort :
328
- result = result .sort_values (ascending = ascending )
315
+ if dropna :
316
+ msk = keys != iNaT
317
+ keys , counts = keys [msk ], counts [msk ]
329
318
330
- if normalize :
331
- result = result / float (counts .sum ())
319
+ # localize to the original tz if necessary
320
+ if is_datetimetz :
321
+ keys = DatetimeIndex (keys ).tz_localize (tz )
332
322
333
- return result
323
+ # convert the keys back to the dtype we came in
324
+ else :
325
+ keys = keys .astype (dtype )
326
+
327
+ elif com .is_integer_dtype (dtype ):
328
+ values = com ._ensure_int64 (values )
329
+ keys , counts = htable .value_count_scalar64 (values , dropna )
330
+ elif com .is_float_dtype (dtype ):
331
+ values = com ._ensure_float64 (values )
332
+ keys , counts = htable .value_count_scalar64 (values , dropna )
333
+
334
+ else :
335
+ values = com ._ensure_object (values )
336
+ mask = com .isnull (values )
337
+ keys , counts = htable .value_count_object (values , mask )
338
+ if not dropna and mask .any ():
339
+ keys = np .insert (keys , 0 , np .NaN )
340
+ counts = np .insert (counts , 0 , mask .sum ())
341
+
342
+ return keys , counts
334
343
335
344
336
345
def mode (values ):
0 commit comments