Skip to content

Commit ec6361f

Browse files
committed
DEPR: deprecate relabling dictionarys in groupby.agg
1 parent 0cfc08c commit ec6361f

File tree

8 files changed

+335
-84
lines changed

8 files changed

+335
-84
lines changed

doc/source/whatsnew/v0.20.0.txt

+73
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,79 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi
423423
df.iloc[[0, 2], df.columns.get_loc('A')]
424424

425425

426+
.. _whatsnew_0200.api_breaking.deprecate_agg_series:
427+
428+
Deprecate groupby.agg() with a dictionary when renaming
429+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
430+
431+
The ``.groupby(..).agg(..)`` syntax can accept a variable of inputs, including scalars, list, and a dictionary of column names to scalars or lists.
432+
This provides a useful syntax for constructing multiple (potentially different) aggregations for a groupby.
433+
434+
1) We are deprecating passing a dictionary to a grouped ``Series``. This allowed one to ``rename`` the resulting aggregation, but this had a completely different
435+
meaning that passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations.
436+
2) We are deprecating passing a dict-of-dict to a grouped ``DataFrame`` in a similar manner.
437+
438+
Here's an example of 1), passing a dict to a grouped ``Series``:
439+
440+
.. ipython:: python
441+
442+
df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
443+
'B': range(5),
444+
'C':range(5)})
445+
df
446+
447+
Aggregating a DataFrame with column selection.
448+
449+
.. ipython:: python
450+
451+
df.groupby('A').agg({'B': ['sum', 'max'],
452+
'C': ['count', 'min']})
453+
454+
455+
We are deprecating the following
456+
457+
.. code-block:: ipython. Which is a combination aggregation & renaming.
458+
459+
In [6]: df.groupby('A').B.agg({'foo': 'count'})
460+
FutureWarning: using a dictionary on a Series for aggregation
461+
is deprecated and will be removed in a future version
462+
463+
Out[6]:
464+
foo
465+
A
466+
1 3
467+
2 2
468+
469+
You can accomplish the same operation, more idiomatically by:
470+
471+
.. ipython:: python
472+
473+
df.groupby('A').B.agg(['count']).rename({'count': 'foo'})
474+
475+
476+
Here's an example of 2), passing a dict-of-dict to a grouped ``DataFrame``:
477+
478+
.. code-block:: python
479+
480+
In [23]: df.groupby('A').agg({'B': {'foo': ['sum', 'max']}, 'C': {'bar': ['count', 'min']}})
481+
FutureWarning: using a dictionary on a Series for aggregation
482+
is deprecated and will be removed in a future version
483+
484+
Out[23]:
485+
foo bar
486+
sum max count min
487+
A
488+
1 3 2 3 0
489+
2 7 4 2 3
490+
491+
You can accomplish the same by:
492+
493+
.. ipython:: python
494+
495+
r = df.groupby('A').agg({'B': ['sum', 'max'], 'C': ['count', 'min']})
496+
r.columns = r.columns.set_levels(['foo', 'bar'], level=0)
497+
r
498+
426499
.. _whatsnew.api_breaking.io_compat:
427500

428501
Possible incompat for HDF5 formats for pandas < 0.13.0

pandas/core/base.py

+118-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Base and utility classes for pandas objects.
33
"""
4+
import warnings
45
from pandas import compat
56
from pandas.compat import builtins
67
import numpy as np
@@ -290,7 +291,9 @@ class SelectionMixin(object):
290291
}
291292

292293
@property
293-
def name(self):
294+
def _selection_name(self):
295+
""" return a name for myself; this would ideally be the 'name' property, but
296+
we cannot conflict with the Series.name property which can be set """
294297
if self._selection is None:
295298
return None # 'result'
296299
else:
@@ -405,6 +408,26 @@ def aggregate(self, func, *args, **kwargs):
405408

406409
agg = aggregate
407410

411+
def _try_aggregate_string_function(self, arg, *args, **kwargs):
412+
"""
413+
if arg is a string, then try to operate on it:
414+
- try to find a function on ourselves
415+
- try to find a numpy function
416+
- raise
417+
418+
"""
419+
assert isinstance(arg, compat.string_types)
420+
421+
f = getattr(self, arg, None)
422+
if f is not None:
423+
return f(*args, **kwargs)
424+
425+
f = getattr(np, arg, None)
426+
if f is not None:
427+
return f(self, *args, **kwargs)
428+
429+
raise ValueError("{} is an unknown string function".format(arg))
430+
408431
def _aggregate(self, arg, *args, **kwargs):
409432
"""
410433
provide an implementation for the aggregators
@@ -428,14 +451,19 @@ def _aggregate(self, arg, *args, **kwargs):
428451
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
429452
is_nested_renamer = False
430453

454+
_axis = kwargs.pop('_axis', None)
455+
if _axis is None:
456+
_axis = getattr(self, 'axis', 0)
431457
_level = kwargs.pop('_level', None)
458+
432459
if isinstance(arg, compat.string_types):
433-
return getattr(self, arg)(*args, **kwargs), None
460+
return self._try_aggregate_string_function(arg, *args,
461+
**kwargs), None
434462

435463
if isinstance(arg, dict):
436464

437465
# aggregate based on the passed dict
438-
if self.axis != 0: # pragma: no cover
466+
if _axis != 0: # pragma: no cover
439467
raise ValueError('Can only pass dict with axis=0')
440468

441469
obj = self._selected_obj
@@ -505,6 +533,16 @@ def _agg(arg, func):
505533
keys = list(compat.iterkeys(arg))
506534
result = compat.OrderedDict()
507535

536+
# renaming keys
537+
if isinstance(self._selected_obj, ABCDataFrame):
538+
if len(self._selected_obj.columns.intersection(
539+
keys)) != len(keys):
540+
warnings.warn(
541+
("using a dict with renaming"
542+
"is deprecated and will be removed in a future "
543+
"version"),
544+
FutureWarning, stacklevel=3)
545+
508546
# nested renamer
509547
if is_nested_renamer:
510548
result = list(_agg(arg, _agg_1dim).values())
@@ -534,7 +572,7 @@ def _agg(arg, func):
534572
agg_how: _agg_1dim(self._selection, agg_how))
535573

536574
# we are selecting the same set as we are aggregating
537-
elif not len(sl - set(compat.iterkeys(arg))):
575+
elif not len(sl - set(keys)):
538576

539577
result = _agg(arg, _agg_1dim)
540578

@@ -555,32 +593,74 @@ def _agg(arg, func):
555593
result = _agg(arg, _agg_2dim)
556594

557595
# combine results
596+
597+
def is_any_series():
598+
# return a boolean if we have *any* nested series
599+
return any([isinstance(r, ABCSeries)
600+
for r in compat.itervalues(result)])
601+
602+
def is_any_frame():
603+
# return a boolean if we have *any* nested series
604+
return any([isinstance(r, ABCDataFrame)
605+
for r in compat.itervalues(result)])
606+
558607
if isinstance(result, list):
559-
result = concat(result, keys=keys, axis=1)
560-
elif isinstance(list(compat.itervalues(result))[0],
561-
ABCDataFrame):
562-
result = concat([result[k] for k in keys], keys=keys, axis=1)
563-
else:
564-
from pandas import DataFrame
608+
return concat(result, keys=keys, axis=1), True
609+
610+
elif is_any_frame():
611+
# we have a dict of DataFrames
612+
# return a MI DataFrame
613+
614+
return concat([result[k] for k in keys],
615+
keys=keys, axis=1), True
616+
617+
elif isinstance(self, ABCSeries) and is_any_series():
618+
619+
# we have a dict of Series
620+
# return a MI Series
621+
try:
622+
result = concat(result)
623+
except TypeError:
624+
# we want to give a nice error here if
625+
# we have non-same sized objects, so
626+
# we don't automatically broadcast
627+
628+
raise ValueError("cannot perform both aggregation "
629+
"and transformation operations "
630+
"simultaneously")
631+
632+
return result, True
633+
634+
# fall thru
635+
from pandas import DataFrame, Series
636+
try:
565637
result = DataFrame(result)
638+
except ValueError:
639+
640+
# we have a dict of scalars
641+
result = Series(result,
642+
name=getattr(self, 'name', None))
566643

567644
return result, True
568-
elif hasattr(arg, '__iter__'):
569-
return self._aggregate_multiple_funcs(arg, _level=_level), None
645+
elif is_list_like(arg) and arg not in compat.string_types:
646+
# we require a list, but not an 'str'
647+
return self._aggregate_multiple_funcs(arg,
648+
_level=_level,
649+
_axis=_axis), None
570650
else:
571651
result = None
572652

573-
cy_func = self._is_cython_func(arg)
574-
if cy_func and not args and not kwargs:
575-
return getattr(self, cy_func)(), None
653+
f = self._is_cython_func(arg)
654+
if f and not args and not kwargs:
655+
return getattr(self, f)(), None
576656

577657
# caller can react
578658
return result, True
579659

580-
def _aggregate_multiple_funcs(self, arg, _level):
660+
def _aggregate_multiple_funcs(self, arg, _level, _axis):
581661
from pandas.tools.concat import concat
582662

583-
if self.axis != 0:
663+
if _axis != 0:
584664
raise NotImplementedError("axis other than 0 is not supported")
585665

586666
if self._selected_obj.ndim == 1:
@@ -615,10 +695,30 @@ def _aggregate_multiple_funcs(self, arg, _level):
615695
keys.append(col)
616696
except (TypeError, DataError):
617697
pass
698+
except ValueError:
699+
# cannot aggregate
700+
continue
618701
except SpecificationError:
619702
raise
620703

621-
return concat(results, keys=keys, axis=1)
704+
# if we are empty
705+
if not len(results):
706+
raise ValueError("no results")
707+
708+
try:
709+
return concat(results, keys=keys, axis=1)
710+
except TypeError:
711+
712+
# we are concatting non-NDFrame objects,
713+
# e.g. a list of scalars
714+
715+
from pandas.types.cast import is_nested_object
716+
from pandas import Series
717+
result = Series(results, index=keys, name=self.name)
718+
if is_nested_object(result):
719+
raise ValueError("cannot combine transform and "
720+
"aggregation operations")
721+
return result
622722

623723
def _shallow_copy(self, obj=None, obj_type=None, **kwargs):
624724
""" return a new object with the replacement attributes """

0 commit comments

Comments
 (0)