Skip to content

Commit 99bae05

Browse files
committed
ENH: add set_index to Series
1 parent aaa69d1 commit 99bae05

File tree

5 files changed

+377
-92
lines changed

5 files changed

+377
-92
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ Other Enhancements
194194
The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
195195
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
196196
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
197+
- :class:`Series` has gained the method :meth:`Series.set_index`, which works like its :class:`DataFrame` counterpart :meth:`DataFrame.set_index` (:issue:`21684`)
197198
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
198199
- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
199200
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).

pandas/core/frame.py

+32-80
Original file line numberDiff line numberDiff line change
@@ -3914,43 +3914,54 @@ def shift(self, periods=1, freq=None, axis=0):
39143914
def set_index(self, keys, drop=True, append=False, inplace=False,
39153915
verify_integrity=False):
39163916
"""
3917-
Set the DataFrame index (row labels) using one or more existing
3918-
columns. By default yields a new object.
3917+
Set the DataFrame index (row labels) using one or more columns.
39193918
39203919
Parameters
39213920
----------
39223921
keys : column label or list of column labels / arrays
3922+
Either a column label, Series, Index, MultiIndex, list,
3923+
np.ndarray or a list containing only column labels, Series, Index,
3924+
MultiIndex, list, np.ndarray.
39233925
drop : boolean, default True
3924-
Delete columns to be used as the new index
3926+
Delete columns to be used as the new index.
39253927
append : boolean, default False
3926-
Whether to append columns to existing index
3928+
Whether to append columns to existing index.
39273929
inplace : boolean, default False
3928-
Modify the DataFrame in place (do not create a new object)
3930+
Modify the DataFrame in place (do not create a new object).
39293931
verify_integrity : boolean, default False
39303932
Check the new index for duplicates. Otherwise defer the check until
39313933
necessary. Setting to False will improve the performance of this
3932-
method
3934+
method.
3935+
3936+
Returns
3937+
-------
3938+
reindexed : DataFrame if inplace is False, else None
3939+
3940+
See Also
3941+
--------
3942+
Series.set_index: Corresponding method for Series
39333943
39343944
Examples
39353945
--------
39363946
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
39373947
... 'year': [2012, 2014, 2013, 2014],
3938-
... 'sale':[55, 40, 84, 31]})
3939-
month sale year
3940-
0 1 55 2012
3941-
1 4 40 2014
3942-
2 7 84 2013
3943-
3 10 31 2014
3948+
... 'sale': [55, 40, 84, 31]})
3949+
>>> df
3950+
month year sale
3951+
0 1 2012 55
3952+
1 4 2014 40
3953+
2 7 2013 84
3954+
3 10 2014 31
39443955
39453956
Set the index to become the 'month' column:
39463957
39473958
>>> df.set_index('month')
3948-
sale year
3959+
year sale
39493960
month
3950-
1 55 2012
3951-
4 40 2014
3952-
7 84 2013
3953-
10 31 2014
3961+
1 2012 55
3962+
4 2014 40
3963+
7 2013 84
3964+
10 2014 31
39543965
39553966
Create a multi-index using columns 'year' and 'month':
39563967
@@ -3971,73 +3982,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
39713982
2 2014 4 40
39723983
3 2013 7 84
39733984
4 2014 10 31
3974-
3975-
Returns
3976-
-------
3977-
dataframe : DataFrame
39783985
"""
3979-
inplace = validate_bool_kwarg(inplace, 'inplace')
39803986
if not isinstance(keys, list):
39813987
keys = [keys]
39823988

3983-
if inplace:
3984-
frame = self
3985-
else:
3986-
frame = self.copy()
3987-
3988-
arrays = []
3989-
names = []
3990-
if append:
3991-
names = [x for x in self.index.names]
3992-
if isinstance(self.index, MultiIndex):
3993-
for i in range(self.index.nlevels):
3994-
arrays.append(self.index._get_level_values(i))
3995-
else:
3996-
arrays.append(self.index)
3997-
3998-
to_remove = []
3999-
for col in keys:
4000-
if isinstance(col, MultiIndex):
4001-
# append all but the last column so we don't have to modify
4002-
# the end of this loop
4003-
for n in range(col.nlevels - 1):
4004-
arrays.append(col._get_level_values(n))
4005-
4006-
level = col._get_level_values(col.nlevels - 1)
4007-
names.extend(col.names)
4008-
elif isinstance(col, Series):
4009-
level = col._values
4010-
names.append(col.name)
4011-
elif isinstance(col, Index):
4012-
level = col
4013-
names.append(col.name)
4014-
elif isinstance(col, (list, np.ndarray, Index)):
4015-
level = col
4016-
names.append(None)
4017-
else:
4018-
level = frame[col]._values
4019-
names.append(col)
4020-
if drop:
4021-
to_remove.append(col)
4022-
arrays.append(level)
4023-
4024-
index = ensure_index_from_sequences(arrays, names)
4025-
4026-
if verify_integrity and not index.is_unique:
4027-
duplicates = index[index.duplicated()].unique()
4028-
raise ValueError('Index has duplicate keys: {dup}'.format(
4029-
dup=duplicates))
4030-
4031-
for c in to_remove:
4032-
del frame[c]
4033-
4034-
# clear up memory usage
4035-
index._cleanup()
4036-
4037-
frame.index = index
4038-
4039-
if not inplace:
4040-
return frame
3989+
vi = verify_integrity
3990+
return super(DataFrame, self).set_index(keys=keys, drop=drop,
3991+
append=append, inplace=inplace,
3992+
verify_integrity=vi)
40413993

40423994
def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
40433995
col_fill=''):

pandas/core/generic.py

+137-3
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
3333
from pandas.core.dtypes.inference import is_hashable
3434
from pandas.core.dtypes.missing import isna, notna
35-
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
35+
from pandas.core.dtypes.generic import (ABCIndexClass, ABCMultiIndex, ABCPanel,
36+
ABCSeries, ABCDataFrame)
3637

3738
from pandas.core.base import PandasObject, SelectionMixin
38-
from pandas.core.index import (Index, MultiIndex, ensure_index,
39-
InvalidIndexError, RangeIndex)
39+
from pandas.core.index import (Index, MultiIndex,
40+
InvalidIndexError, RangeIndex,
41+
ensure_index, ensure_index_from_sequences)
4042
import pandas.core.indexing as indexing
4143
from pandas.core.indexes.datetimes import DatetimeIndex
4244
from pandas.core.indexes.period import PeriodIndex, Period
@@ -643,6 +645,138 @@ def _set_axis(self, axis, labels):
643645
self._data.set_axis(axis, labels)
644646
self._clear_item_cache()
645647

648+
def set_index(self, keys, drop=True, append=False, inplace=False,
649+
verify_integrity=False):
650+
"""
651+
Set the index (row labels) using one or more given arrays (or labels).
652+
653+
Parameters
654+
----------
655+
keys : column label or list of column labels / arrays
656+
Either a Series, Index, MultiIndex, list, np.ndarray or a list
657+
containing only Series, Index, MultiIndex, list, np.ndarray.
658+
659+
For DataFrame, additionally column labels may be used.
660+
drop : boolean, default True
661+
Delete columns to be used as the new index (only for DataFrame).
662+
append : boolean, default False
663+
Whether to append columns to existing index.
664+
inplace : boolean, default False
665+
Modify the Series/DataFrame in place (do not create a new object).
666+
verify_integrity : boolean, default False
667+
Check the new index for duplicates. Otherwise defer the check until
668+
necessary. Setting to False will improve the performance of this
669+
method.
670+
671+
Returns
672+
-------
673+
reindexed : Series/DataFrame if inplace is False, else None
674+
675+
See Also
676+
--------
677+
DataFrame.set_index: method adapted for DataFrame
678+
Series.set_index: method adapted for Series
679+
680+
Examples
681+
--------
682+
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
683+
... 'year': [2012, 2014, 2013, 2014],
684+
... 'sale': [55, 40, 84, 31]})
685+
>>> df
686+
month year sale
687+
0 1 2012 55
688+
1 4 2014 40
689+
2 7 2013 84
690+
3 10 2014 31
691+
692+
Set the index to become the 'month' column:
693+
694+
>>> df.set_index('month')
695+
year sale
696+
month
697+
1 2012 55
698+
4 2014 40
699+
7 2013 84
700+
10 2014 31
701+
702+
Create a multi-index using columns 'year' and 'month':
703+
704+
>>> df.set_index(['year', 'month'])
705+
sale
706+
year month
707+
2012 1 55
708+
2014 4 40
709+
2013 7 84
710+
2014 10 31
711+
712+
Create a multi-index using a set of values and a column:
713+
714+
>>> df.set_index([[1, 2, 3, 4], 'year'])
715+
month sale
716+
year
717+
1 2012 1 55
718+
2 2014 4 40
719+
3 2013 7 84
720+
4 2014 10 31
721+
"""
722+
inplace = validate_bool_kwarg(inplace, 'inplace')
723+
if inplace:
724+
obj = self
725+
else:
726+
obj = self.copy()
727+
728+
arrays = []
729+
names = []
730+
if append:
731+
names = [x for x in self.index.names]
732+
if isinstance(self.index, ABCMultiIndex):
733+
for i in range(self.index.nlevels):
734+
arrays.append(self.index._get_level_values(i))
735+
else:
736+
arrays.append(self.index)
737+
738+
to_remove = []
739+
for col in keys:
740+
if isinstance(col, ABCMultiIndex):
741+
for n in range(col.nlevels):
742+
arrays.append(col._get_level_values(n))
743+
names.extend(col.names)
744+
elif isinstance(col, ABCIndexClass):
745+
# Index but not MultiIndex (treated above)
746+
arrays.append(col)
747+
names.append(col.name)
748+
elif isinstance(col, ABCSeries):
749+
arrays.append(col._values)
750+
names.append(col.name)
751+
elif isinstance(col, (list, np.ndarray)):
752+
arrays.append(col)
753+
names.append(None)
754+
# from here, col can only be a column label (and obj a DataFrame);
755+
# see checks in Series.set_index and DataFrame.set_index
756+
else:
757+
arrays.append(obj[col]._values)
758+
names.append(col)
759+
if drop:
760+
to_remove.append(col)
761+
762+
index = ensure_index_from_sequences(arrays, names)
763+
764+
if verify_integrity and not index.is_unique:
765+
duplicates = list(index[index.duplicated()])
766+
raise ValueError('Index has duplicate keys: {dup}'.format(
767+
dup=duplicates))
768+
769+
for c in to_remove:
770+
del obj[c]
771+
772+
# clear up memory usage
773+
index._cleanup()
774+
775+
obj.index = index
776+
777+
if not inplace:
778+
return obj
779+
646780
def transpose(self, *args, **kwargs):
647781
"""
648782
Permute the dimensions of the %(klass)s

0 commit comments

Comments
 (0)