Skip to content

Commit 44f61ba

Browse files
committed
DEPR: deprecate relabling dictionarys in groupby.agg
1 parent fbbcc10 commit 44f61ba

File tree

8 files changed

+336
-84
lines changed

8 files changed

+336
-84
lines changed

doc/source/whatsnew/v0.20.0.txt

+74
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi
428428
df.iloc[[0, 2], df.columns.get_loc('A')]
429429

430430

431+
<<<<<<< c25fbde09272f369f280212e5216441d5975687c
431432
.. _whatsnew_0200.api_breaking.deprecate_panel:
432433

433434
Deprecate Panel
@@ -455,6 +456,79 @@ Convert to an xarray DataArray
455456

456457
p.to_xarray()
457458

459+
.. _whatsnew_0200.api_breaking.deprecate_group_agg_dict:
460+
461+
Deprecate groupby.agg() with a dictionary when renaming
462+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
463+
464+
The ``.groupby(..).agg(..)`` syntax can accept a variable of inputs, including scalars, list, and a dictionary of column names to scalars or lists.
465+
This provides a useful syntax for constructing multiple (potentially different) aggregations for a groupby.
466+
467+
1) We are deprecating passing a dictionary to a grouped ``Series``. This allowed one to ``rename`` the resulting aggregation, but this had a completely different
468+
meaning that passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations.
469+
2) We are deprecating passing a dict-of-dict to a grouped ``DataFrame`` in a similar manner.
470+
471+
Here's an example of 1), passing a dict to a grouped ``Series``:
472+
473+
.. ipython:: python
474+
475+
df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
476+
'B': range(5),
477+
'C':range(5)})
478+
df
479+
480+
Aggregating a DataFrame with column selection.
481+
482+
.. ipython:: python
483+
484+
df.groupby('A').agg({'B': ['sum', 'max'],
485+
'C': ['count', 'min']})
486+
487+
488+
We are deprecating the following
489+
490+
.. code-block:: ipython. Which is a combination aggregation & renaming.
491+
492+
In [6]: df.groupby('A').B.agg({'foo': 'count'})
493+
FutureWarning: using a dictionary on a Series for aggregation
494+
is deprecated and will be removed in a future version
495+
496+
Out[6]:
497+
foo
498+
A
499+
1 3
500+
2 2
501+
502+
You can accomplish the same operation, more idiomatically by:
503+
504+
.. ipython:: python
505+
506+
df.groupby('A').B.agg(['count']).rename({'count': 'foo'})
507+
508+
509+
Here's an example of 2), passing a dict-of-dict to a grouped ``DataFrame``:
510+
511+
.. code-block:: python
512+
513+
In [23]: df.groupby('A').agg({'B': {'foo': ['sum', 'max']}, 'C': {'bar': ['count', 'min']}})
514+
FutureWarning: using a dictionary on a Series for aggregation
515+
is deprecated and will be removed in a future version
516+
517+
Out[23]:
518+
foo bar
519+
sum max count min
520+
A
521+
1 3 2 3 0
522+
2 7 4 2 3
523+
524+
You can accomplish the same by:
525+
526+
.. ipython:: python
527+
528+
r = df.groupby('A').agg({'B': ['sum', 'max'], 'C': ['count', 'min']})
529+
r.columns = r.columns.set_levels(['foo', 'bar'], level=0)
530+
r
531+
458532
.. _whatsnew.api_breaking.io_compat:
459533

460534
Possible incompat for HDF5 formats for pandas < 0.13.0

pandas/core/base.py

+118-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Base and utility classes for pandas objects.
33
"""
4+
import warnings
45
from pandas import compat
56
from pandas.compat import builtins
67
import numpy as np
@@ -290,7 +291,9 @@ class SelectionMixin(object):
290291
}
291292

292293
@property
293-
def name(self):
294+
def _selection_name(self):
295+
""" return a name for myself; this would ideally be the 'name' property, but
296+
we cannot conflict with the Series.name property which can be set """
294297
if self._selection is None:
295298
return None # 'result'
296299
else:
@@ -405,6 +408,26 @@ def aggregate(self, func, *args, **kwargs):
405408

406409
agg = aggregate
407410

411+
def _try_aggregate_string_function(self, arg, *args, **kwargs):
412+
"""
413+
if arg is a string, then try to operate on it:
414+
- try to find a function on ourselves
415+
- try to find a numpy function
416+
- raise
417+
418+
"""
419+
assert isinstance(arg, compat.string_types)
420+
421+
f = getattr(self, arg, None)
422+
if f is not None:
423+
return f(*args, **kwargs)
424+
425+
f = getattr(np, arg, None)
426+
if f is not None:
427+
return f(self, *args, **kwargs)
428+
429+
raise ValueError("{} is an unknown string function".format(arg))
430+
408431
def _aggregate(self, arg, *args, **kwargs):
409432
"""
410433
provide an implementation for the aggregators
@@ -428,14 +451,19 @@ def _aggregate(self, arg, *args, **kwargs):
428451
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
429452
is_nested_renamer = False
430453

454+
_axis = kwargs.pop('_axis', None)
455+
if _axis is None:
456+
_axis = getattr(self, 'axis', 0)
431457
_level = kwargs.pop('_level', None)
458+
432459
if isinstance(arg, compat.string_types):
433-
return getattr(self, arg)(*args, **kwargs), None
460+
return self._try_aggregate_string_function(arg, *args,
461+
**kwargs), None
434462

435463
if isinstance(arg, dict):
436464

437465
# aggregate based on the passed dict
438-
if self.axis != 0: # pragma: no cover
466+
if _axis != 0: # pragma: no cover
439467
raise ValueError('Can only pass dict with axis=0')
440468

441469
obj = self._selected_obj
@@ -505,6 +533,16 @@ def _agg(arg, func):
505533
keys = list(compat.iterkeys(arg))
506534
result = compat.OrderedDict()
507535

536+
# renaming keys
537+
if isinstance(self._selected_obj, ABCDataFrame):
538+
if len(self._selected_obj.columns.intersection(
539+
keys)) != len(keys):
540+
warnings.warn(
541+
("using a dict with renaming"
542+
"is deprecated and will be removed in a future "
543+
"version"),
544+
FutureWarning, stacklevel=3)
545+
508546
# nested renamer
509547
if is_nested_renamer:
510548
result = list(_agg(arg, _agg_1dim).values())
@@ -534,7 +572,7 @@ def _agg(arg, func):
534572
agg_how: _agg_1dim(self._selection, agg_how))
535573

536574
# we are selecting the same set as we are aggregating
537-
elif not len(sl - set(compat.iterkeys(arg))):
575+
elif not len(sl - set(keys)):
538576

539577
result = _agg(arg, _agg_1dim)
540578

@@ -555,32 +593,74 @@ def _agg(arg, func):
555593
result = _agg(arg, _agg_2dim)
556594

557595
# combine results
596+
597+
def is_any_series():
598+
# return a boolean if we have *any* nested series
599+
return any([isinstance(r, ABCSeries)
600+
for r in compat.itervalues(result)])
601+
602+
def is_any_frame():
603+
# return a boolean if we have *any* nested series
604+
return any([isinstance(r, ABCDataFrame)
605+
for r in compat.itervalues(result)])
606+
558607
if isinstance(result, list):
559-
result = concat(result, keys=keys, axis=1)
560-
elif isinstance(list(compat.itervalues(result))[0],
561-
ABCDataFrame):
562-
result = concat([result[k] for k in keys], keys=keys, axis=1)
563-
else:
564-
from pandas import DataFrame
608+
return concat(result, keys=keys, axis=1), True
609+
610+
elif is_any_frame():
611+
# we have a dict of DataFrames
612+
# return a MI DataFrame
613+
614+
return concat([result[k] for k in keys],
615+
keys=keys, axis=1), True
616+
617+
elif isinstance(self, ABCSeries) and is_any_series():
618+
619+
# we have a dict of Series
620+
# return a MI Series
621+
try:
622+
result = concat(result)
623+
except TypeError:
624+
# we want to give a nice error here if
625+
# we have non-same sized objects, so
626+
# we don't automatically broadcast
627+
628+
raise ValueError("cannot perform both aggregation "
629+
"and transformation operations "
630+
"simultaneously")
631+
632+
return result, True
633+
634+
# fall thru
635+
from pandas import DataFrame, Series
636+
try:
565637
result = DataFrame(result)
638+
except ValueError:
639+
640+
# we have a dict of scalars
641+
result = Series(result,
642+
name=getattr(self, 'name', None))
566643

567644
return result, True
568-
elif hasattr(arg, '__iter__'):
569-
return self._aggregate_multiple_funcs(arg, _level=_level), None
645+
elif is_list_like(arg) and arg not in compat.string_types:
646+
# we require a list, but not an 'str'
647+
return self._aggregate_multiple_funcs(arg,
648+
_level=_level,
649+
_axis=_axis), None
570650
else:
571651
result = None
572652

573-
cy_func = self._is_cython_func(arg)
574-
if cy_func and not args and not kwargs:
575-
return getattr(self, cy_func)(), None
653+
f = self._is_cython_func(arg)
654+
if f and not args and not kwargs:
655+
return getattr(self, f)(), None
576656

577657
# caller can react
578658
return result, True
579659

580-
def _aggregate_multiple_funcs(self, arg, _level):
660+
def _aggregate_multiple_funcs(self, arg, _level, _axis):
581661
from pandas.tools.concat import concat
582662

583-
if self.axis != 0:
663+
if _axis != 0:
584664
raise NotImplementedError("axis other than 0 is not supported")
585665

586666
if self._selected_obj.ndim == 1:
@@ -615,10 +695,30 @@ def _aggregate_multiple_funcs(self, arg, _level):
615695
keys.append(col)
616696
except (TypeError, DataError):
617697
pass
698+
except ValueError:
699+
# cannot aggregate
700+
continue
618701
except SpecificationError:
619702
raise
620703

621-
return concat(results, keys=keys, axis=1)
704+
# if we are empty
705+
if not len(results):
706+
raise ValueError("no results")
707+
708+
try:
709+
return concat(results, keys=keys, axis=1)
710+
except TypeError:
711+
712+
# we are concatting non-NDFrame objects,
713+
# e.g. a list of scalars
714+
715+
from pandas.types.cast import is_nested_object
716+
from pandas import Series
717+
result = Series(results, index=keys, name=self.name)
718+
if is_nested_object(result):
719+
raise ValueError("cannot combine transform and "
720+
"aggregation operations")
721+
return result
622722

623723
def _shallow_copy(self, obj=None, obj_type=None, **kwargs):
624724
""" return a new object with the replacement attributes """

0 commit comments

Comments
 (0)