From bf0eee049b6d64fc18875f04eb93da295dd3e177 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 10 Jan 2019 08:51:31 +0100
Subject: [PATCH 01/14] DEPR/API: disallow lists within list for set_index

---
 pandas/core/frame.py                  | 56 +++++++++++++++++----------
 pandas/tests/frame/test_alter_axes.py | 48 ++++++++---------------
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7bbbdd70e062e..de52fd95f7b56 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4041,12 +4041,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         Set the DataFrame index using existing columns.
 
         Set the DataFrame index (row labels) using one or more existing
-        columns. The index can replace the existing index or expand on it.
+        columns or arrays (of the correct length). The index can replace the
+        existing index or expand on it.
 
         Parameters
         ----------
-        keys : label or list of label
-            Name or names of the columns that will be used as the index.
+        keys : label or array-like or list-like of labels/arrays
+            This parameter can be either a single column key, a single array of
+            the same length as the calling DataFrame, or a list-like containing
+            an arbitrary combination of column keys and arrays.
         drop : bool, default True
             Delete columns to be used as the new index.
         append : bool, default False
@@ -4091,7 +4094,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         7      2013    84
         10     2014    31
 
-        Create a multi-index using columns 'year' and 'month':
+        Create a MultiIndex using columns 'year' and 'month':
 
         >>> df.set_index(['year', 'month'])
                     sale
@@ -4101,35 +4104,52 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         2013  7     84
         2014  10    31
 
-        Create a multi-index using a set of values and a column:
+        Create a MultiIndex using a set of values and a column:
 
-        >>> df.set_index([[1, 2, 3, 4], 'year'])
+        >>> df.set_index([pd.Index([1, 2, 3, 4]), 'year'])
                  month  sale
            year
         1  2012  1      55
         2  2014  4      40
         3  2013  7      84
         4  2014  10     31
+
+        Create a MultiIndex using a set of values and a column:
+
+        >>> s = pd.Series([1, 2, 3, 4])
+        >>> df.set_index([s, s**2])
+              month  year  sale
+        1 1       1  2012    55
+        2 4       4  2014    40
+        3 9       7  2013    84
+        4 16     10  2014    31
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        if not isinstance(keys, list):
+
+        err_msg = ('The parameter "keys" may be a column key, one-dimensional '
+                   'array, or a list-like containing only valid column keys '
+                   'and one-dimensional arrays')
+
+        if (is_scalar(keys) or isinstance(keys, tuple)
+                or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
+            # make sure we have a container of keys/arrays we can iterate over
+            # tuples can appear as valid column keys!
             keys = [keys]
+        elif not isinstance(keys, list):
+            raise ValueError(err_msg)
 
         missing = []
         for col in keys:
             if (is_scalar(col) or isinstance(col, tuple)) and col in self:
-                # tuples can be both column keys or list-likes
-                # if they are valid column keys, everything is fine
+                # if col is a valid column key, everything is fine
                 continue
             elif is_scalar(col) and col not in self:
-                # tuples that are not column keys are considered list-like,
-                # not considered missing
+                # tuples that are not keys will be are excluded here;
+                # will be considered list-like, not missing
                 missing.append(col)
-            elif (not is_list_like(col, allow_sets=False)
+            elif (not isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray))
                   or getattr(col, 'ndim', 1) > 1):
-                raise TypeError('The parameter "keys" may only contain a '
-                                'combination of valid column keys and '
-                                'one-dimensional list-likes')
+                raise ValueError(err_msg)
 
         if missing:
             raise KeyError('{}'.format(missing))
@@ -4162,12 +4182,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             elif isinstance(col, (list, np.ndarray)):
                 arrays.append(col)
                 names.append(None)
-            elif (is_list_like(col)
-                  and not (isinstance(col, tuple) and col in self)):
-                # all other list-likes (but avoid valid column keys)
-                col = list(col)  # ensure iterator do not get read twice etc.
-                arrays.append(col)
-                names.append(None)
             # from here, col can only be a column label
             else:
                 arrays.append(frame[col]._values)
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index b63151dfb459e..46316c429f554 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -118,7 +118,6 @@ def test_set_index_after_mutation(self):
     # Add list-of-list constructor because list is ambiguous -> lambda
     # also test index name if append=True (name is duplicate here for B)
     @pytest.mark.parametrize('box', [Series, Index, np.array,
-                                     list, tuple, iter, lambda x: [list(x)],
                                      lambda x: MultiIndex.from_arrays([x])])
     @pytest.mark.parametrize('append, index_name', [(True, None),
                              (True, 'B'), (True, 'test'), (False, None)])
@@ -129,29 +128,22 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
         df.index.name = index_name
 
         key = box(df['B'])
-        if box == list:
-            # list of strings gets interpreted as list of keys
-            msg = "['one', 'two', 'three', 'one', 'two']"
-            with pytest.raises(KeyError, match=msg):
-                df.set_index(key, drop=drop, append=append)
-        else:
-            # np.array/tuple/iter/list-of-list "forget" the name of B
-            name_mi = getattr(key, 'names', None)
-            name = [getattr(key, 'name', None)] if name_mi is None else name_mi
+        # np.array "forgets" the name of B
+        name_mi = getattr(key, 'names', None)
+        name = [getattr(key, 'name', None)] if name_mi is None else name_mi
 
-            result = df.set_index(key, drop=drop, append=append)
+        result = df.set_index(key, drop=drop, append=append)
 
-            # only valid column keys are dropped
-            # since B is always passed as array above, nothing is dropped
-            expected = df.set_index(['B'], drop=False, append=append)
-            expected.index.names = [index_name] + name if append else name
+        # only valid column keys are dropped
+        # since B is always passed as array above, nothing is dropped
+        expected = df.set_index(['B'], drop=False, append=append)
+        expected.index.names = [index_name] + name if append else name
 
-            tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected)
 
     # MultiIndex constructor does not work directly on Series -> lambda
     # also test index name if append=True (name is duplicate here for A & B)
     @pytest.mark.parametrize('box', [Series, Index, np.array,
-                                     list, tuple, iter,
                                      lambda x: MultiIndex.from_arrays([x])])
     @pytest.mark.parametrize('append, index_name',
                              [(True, None), (True, 'A'), (True, 'B'),
@@ -163,8 +155,8 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
         df.index.name = index_name
 
         keys = ['A', box(df['B'])]
-        # np.array/list/tuple/iter "forget" the name of B
-        names = ['A', None if box in [np.array, list, tuple, iter] else 'B']
+        # np.array "forgets" the name of B
+        names = ['A', None if box in [np.array] else 'B']
 
         result = df.set_index(keys, drop=drop, append=append)
 
@@ -180,11 +172,9 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
     # We also emulate a "constructor" for the label -> lambda
     # also test index name if append=True (name is duplicate here for A)
     @pytest.mark.parametrize('box2', [Series, Index, np.array,
-                                      list, tuple, iter,
                                       lambda x: MultiIndex.from_arrays([x]),
                                       lambda x: x.name])
     @pytest.mark.parametrize('box1', [Series, Index, np.array,
-                                      list, tuple, iter,
                                       lambda x: MultiIndex.from_arrays([x]),
                                       lambda x: x.name])
     @pytest.mark.parametrize('append, index_name', [(True, None),
@@ -198,19 +188,15 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         keys = [box1(df['A']), box2(df['A'])]
         result = df.set_index(keys, drop=drop, append=append)
 
-        # if either box was iter, the content has been consumed; re-read it
-        keys = [box1(df['A']), box2(df['A'])]
-
         # need to adapt first drop for case that both keys are 'A' --
         # cannot drop the same column twice;
         # use "is" because == would give ambiguous Boolean error for containers
         first_drop = False if (keys[0] is 'A' and keys[1] is 'A') else drop
 
         # to test against already-tested behaviour, we add sequentially,
-        # hence second append always True; must wrap keys in list, otherwise
-        # box = list would be illegal
-        expected = df.set_index([keys[0]], drop=first_drop, append=append)
-        expected = expected.set_index([keys[1]], drop=drop, append=True)
+        # hence second append always True
+        expected = df.set_index(keys[0], drop=first_drop, append=append)
+        expected = expected.set_index(keys[1], drop=drop, append=True)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize('append', [True, False])
@@ -249,13 +235,13 @@ def test_set_index_raise(self, frame_of_index_cols, drop, append):
         with pytest.raises(KeyError, match='X'):
             df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
 
-        msg = 'The parameter "keys" may only contain a combination of.*'
+        msg = 'The parameter "keys" may be a column key, .*'
         # forbidden type, e.g. set
-        with pytest.raises(TypeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             df.set_index(set(df['A']), drop=drop, append=append)
 
         # forbidden type in list, e.g. set
-        with pytest.raises(TypeError, match=msg):
+        with pytest.raises(ValueError, match=msg):
             df.set_index(['A', df['A'], set(df['A'])],
                          drop=drop, append=append)
 

From ed0de1f3756e2f90f8ba0d32c1223375bd9fe9a3 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 10 Jan 2019 09:39:58 +0100
Subject: [PATCH 02/14] Add deprecation and whatsnew

---
 doc/source/whatsnew/v0.24.0.rst       |  3 ++-
 pandas/core/frame.py                  | 11 ++++++--
 pandas/tests/frame/test_alter_axes.py | 36 ++++++++++++++++++++-------
 3 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index aee3d78243d2e..1bd926a4d4406 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1251,7 +1251,7 @@ Other API Changes
 - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
 - :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
-- :meth:`DataFrame.set_index` now allows all one-dimensional list-likes, raises a ``TypeError`` for incorrect types,
+- :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, and raises a ``ValueError`` for incorrect types,
   has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
 - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
@@ -1309,6 +1309,7 @@ Deprecations
 - In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`).
 - :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`)
 - :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`)
+- :meth:`DataFrame.set_index` has deprecated using lists of values *within* lists. It remains possible to pass array-likes, both directly and within a list.
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index de52fd95f7b56..42dd7874c6348 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4139,20 +4139,27 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             raise ValueError(err_msg)
 
         missing = []
+        depr_warn = False
         for col in keys:
             if (is_scalar(col) or isinstance(col, tuple)) and col in self:
                 # if col is a valid column key, everything is fine
                 continue
             elif is_scalar(col) and col not in self:
-                # tuples that are not keys will be are excluded here;
-                # will be considered list-like, not missing
+                # tuples that are not keys are not considered missing,
+                # but as an illegal list-like
                 missing.append(col)
+            elif isinstance(col, list):
+                depr_warn = True
             elif (not isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray))
                   or getattr(col, 'ndim', 1) > 1):
                 raise ValueError(err_msg)
 
         if missing:
             raise KeyError('{}'.format(missing))
+        if depr_warn:
+            msg = ('passing lists within a list to the parameter "keys" is '
+                   'deprecated and will be removed in a future version.')
+            warnings.warn(msg, FutureWarning, stacklevel=2)
 
         if inplace:
             frame = self
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 46316c429f554..2670da5ca028e 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -143,7 +143,7 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
 
     # MultiIndex constructor does not work directly on Series -> lambda
     # also test index name if append=True (name is duplicate here for A & B)
-    @pytest.mark.parametrize('box', [Series, Index, np.array,
+    @pytest.mark.parametrize('box', [Series, Index, np.array, list,
                                      lambda x: MultiIndex.from_arrays([x])])
     @pytest.mark.parametrize('append, index_name',
                              [(True, None), (True, 'A'), (True, 'B'),
@@ -156,9 +156,13 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
 
         keys = ['A', box(df['B'])]
         # np.array "forgets" the name of B
-        names = ['A', None if box in [np.array] else 'B']
+        names = ['A', None if box in [list, np.array] else 'B']
 
-        result = df.set_index(keys, drop=drop, append=append)
+        if box == list:
+            with tm.assert_produces_warning(FutureWarning):
+                result = df.set_index(keys, drop=drop, append=append)
+        else:
+            result = df.set_index(keys, drop=drop, append=append)
 
         # only valid column keys are dropped
         # since B is always passed as array above, only A is dropped, if at all
@@ -171,10 +175,10 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
     # MultiIndex constructor does not work directly on Series -> lambda
     # We also emulate a "constructor" for the label -> lambda
     # also test index name if append=True (name is duplicate here for A)
-    @pytest.mark.parametrize('box2', [Series, Index, np.array,
+    @pytest.mark.parametrize('box2', [Series, Index, np.array, list,
                                       lambda x: MultiIndex.from_arrays([x]),
                                       lambda x: x.name])
-    @pytest.mark.parametrize('box1', [Series, Index, np.array,
+    @pytest.mark.parametrize('box1', [Series, Index, np.array, list,
                                       lambda x: MultiIndex.from_arrays([x]),
                                       lambda x: x.name])
     @pytest.mark.parametrize('append, index_name', [(True, None),
@@ -186,7 +190,12 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         df.index.name = index_name
 
         keys = [box1(df['A']), box2(df['A'])]
-        result = df.set_index(keys, drop=drop, append=append)
+
+        if box1 == list or box2 == list:
+            with tm.assert_produces_warning(FutureWarning):
+                result = df.set_index(keys, drop=drop, append=append)
+        else:
+            result = df.set_index(keys, drop=drop, append=append)
 
         # need to adapt first drop for case that both keys are 'A' --
         # cannot drop the same column twice;
@@ -194,9 +203,18 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         first_drop = False if (keys[0] is 'A' and keys[1] is 'A') else drop
 
         # to test against already-tested behaviour, we add sequentially,
-        # hence second append always True
-        expected = df.set_index(keys[0], drop=first_drop, append=append)
-        expected = expected.set_index(keys[1], drop=drop, append=True)
+        # hence second append always True; must wrap keys in list, otherwise
+        # box = list would be interpreted as keys
+        if box1 == list or box2 == list:
+            with tm.assert_produces_warning(FutureWarning):
+                expected = df.set_index([keys[0]], drop=first_drop,
+                                        append=append)
+                expected = expected.set_index([keys[1]], drop=drop,
+                                              append=True)
+        else:
+            expected = df.set_index([keys[0]], drop=first_drop, append=append)
+            expected = expected.set_index([keys[1]], drop=drop, append=True)
+
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize('append', [True, False])

From dc274e3926747f4eabd9ccdada39803ca1abbe72 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 10 Jan 2019 09:44:50 +0100
Subject: [PATCH 03/14] restore test for list-of-scalars interpreted as keys

---
 pandas/tests/frame/test_alter_axes.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 2670da5ca028e..f3db8d09a1278 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -117,7 +117,7 @@ def test_set_index_after_mutation(self):
     # MultiIndex constructor does not work directly on Series -> lambda
     # Add list-of-list constructor because list is ambiguous -> lambda
     # also test index name if append=True (name is duplicate here for B)
-    @pytest.mark.parametrize('box', [Series, Index, np.array,
+    @pytest.mark.parametrize('box', [Series, Index, np.array, list,
                                      lambda x: MultiIndex.from_arrays([x])])
     @pytest.mark.parametrize('append, index_name', [(True, None),
                              (True, 'B'), (True, 'test'), (False, None)])
@@ -128,18 +128,24 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
         df.index.name = index_name
 
         key = box(df['B'])
-        # np.array "forgets" the name of B
-        name_mi = getattr(key, 'names', None)
-        name = [getattr(key, 'name', None)] if name_mi is None else name_mi
+        if box == list:
+            # list of strings gets interpreted as list of keys
+            msg = "['one', 'two', 'three', 'one', 'two']"
+            with pytest.raises(KeyError, match=msg):
+                df.set_index(key, drop=drop, append=append)
+        else:
+            # np.array "forgets" the name of B
+            name_mi = getattr(key, 'names', None)
+            name = [getattr(key, 'name', None)] if name_mi is None else name_mi
 
-        result = df.set_index(key, drop=drop, append=append)
+            result = df.set_index(key, drop=drop, append=append)
 
-        # only valid column keys are dropped
-        # since B is always passed as array above, nothing is dropped
-        expected = df.set_index(['B'], drop=False, append=append)
-        expected.index.names = [index_name] + name if append else name
+            # only valid column keys are dropped
+            # since B is always passed as array above, nothing is dropped
+            expected = df.set_index(['B'], drop=False, append=append)
+            expected.index.names = [index_name] + name if append else name
 
-        tm.assert_frame_equal(result, expected)
+            tm.assert_frame_equal(result, expected)
 
     # MultiIndex constructor does not work directly on Series -> lambda
     # also test index name if append=True (name is duplicate here for A & B)

From 623fc9ae6d9a7136851035d1c77ff0974f7db888 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 10 Jan 2019 13:26:27 +0100
Subject: [PATCH 04/14] Small doc fixes

---
 doc/source/whatsnew/v0.24.0.rst | 4 ++--
 pandas/core/frame.py            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 1bd926a4d4406..98f299ca66e6a 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1251,8 +1251,8 @@ Other API Changes
 - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
 - :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
-- :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, and raises a ``ValueError`` for incorrect types,
-  has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
+- :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, raises a ``ValueError`` for incorrect types,
+  and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
 - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
 - :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 42dd7874c6348..6db2755d1c7c2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4104,7 +4104,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         2013  7     84
         2014  10    31
 
-        Create a MultiIndex using a set of values and a column:
+        Create a MultiIndex using an Index and a column:
 
         >>> df.set_index([pd.Index([1, 2, 3, 4]), 'year'])
                  month  sale
@@ -4114,7 +4114,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         3  2013  7      84
         4  2014  10     31
 
-        Create a MultiIndex using a set of values and a column:
+        Create a MultiIndex using two Series:
 
         >>> s = pd.Series([1, 2, 3, 4])
         >>> df.set_index([s, s**2])

From 5f6e3033f9ac7b419e0f452a71023f12ff5281f3 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 10 Jan 2019 20:05:56 +0100
Subject: [PATCH 05/14] Improve docstring; small fixes

---
 pandas/core/frame.py                  | 13 ++++++++-----
 pandas/tests/frame/test_alter_axes.py |  4 ++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6db2755d1c7c2..a2539a6760772 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4048,8 +4048,11 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         ----------
         keys : label or array-like or list-like of labels/arrays
             This parameter can be either a single column key, a single array of
-            the same length as the calling DataFrame, or a list-like containing
-            an arbitrary combination of column keys and arrays.
+            the same length as the calling DataFrame, or a list containing an
+            arbitrary combination of column keys and arrays. Here, "array"
+            encompasses :class:`Series`, :class:`Index` and ``np.ndarray``.
+            Lists (in the sense of a sequence of values, not column labels)
+            have been deprecated, and will be removed in a future version.
         drop : bool, default True
             Delete columns to be used as the new index.
         append : bool, default False
@@ -4127,8 +4130,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         inplace = validate_bool_kwarg(inplace, 'inplace')
 
         err_msg = ('The parameter "keys" may be a column key, one-dimensional '
-                   'array, or a list-like containing only valid column keys '
-                   'and one-dimensional arrays')
+                   'array, or a list containing only valid column keys and '
+                   'one-dimensional arrays.')
 
         if (is_scalar(keys) or isinstance(keys, tuple)
                 or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
@@ -4146,7 +4149,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                 continue
             elif is_scalar(col) and col not in self:
                 # tuples that are not keys are not considered missing,
-                # but as an illegal list-like
+                # but as an illegal list-like (see below)
                 missing.append(col)
             elif isinstance(col, list):
                 depr_warn = True
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index f3db8d09a1278..6d05b7f7413d2 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -161,8 +161,8 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
         df.index.name = index_name
 
         keys = ['A', box(df['B'])]
-        # np.array "forgets" the name of B
-        names = ['A', None if box in [list, np.array] else 'B']
+        # np.array/list "forget" the name of B
+        names = ['A', None if box in [np.array, list] else 'B']
 
         if box == list:
             with tm.assert_produces_warning(FutureWarning):

From 813b4fcbcaf01659b77d3f905c5bc614e5f2e00e Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 10 Jan 2019 20:08:47 +0100
Subject: [PATCH 06/14] Remove last mention of "list-like"

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a2539a6760772..49404c2036e8b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4046,7 +4046,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
         Parameters
         ----------
-        keys : label or array-like or list-like of labels/arrays
+        keys : label or array-like or list of labels/arrays
             This parameter can be either a single column key, a single array of
             the same length as the calling DataFrame, or a list containing an
             arbitrary combination of column keys and arrays. Here, "array"

From 4c130ee01fc82c7272a49b5cbd21786492fdb397 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 11 Jan 2019 00:28:32 +0100
Subject: [PATCH 07/14] rephrase "illegal"

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 49404c2036e8b..70e755608c386 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4149,7 +4149,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                 continue
             elif is_scalar(col) and col not in self:
                 # tuples that are not keys are not considered missing,
-                # but as an illegal list-like (see below)
+                # but illegal (see below)
                 missing.append(col)
             elif isinstance(col, list):
                 depr_warn = True

From e1d999b8f15adc355257dd09382ec88a10df8b06 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 14 Jan 2019 17:31:16 +0100
Subject: [PATCH 08/14] Improve warning message (review TomAugspurger)

---
 pandas/core/frame.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 70e755608c386..a8204aab9db2b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4160,8 +4160,11 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         if missing:
             raise KeyError('{}'.format(missing))
         if depr_warn:
-            msg = ('passing lists within a list to the parameter "keys" is '
-                   'deprecated and will be removed in a future version.')
+            msg = ('Passing lists within a list to the parameter "keys" is '
+                   'deprecated and will be removed in a future version. To '
+                   'silence this warning, wrap the lists in a Series / Index '
+                   'or np.ndarray. E.g. df.set_index(["A", [1, 2, 3]]) should '
+                   'be passed as df.set_index(["A", pd.Series([1, 2, 3])).')
             warnings.warn(msg, FutureWarning, stacklevel=2)
 
         if inplace:

From 726ef1c427ce39cd4dcca7275ed44658c1340c75 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 14 Jan 2019 17:37:55 +0100
Subject: [PATCH 09/14] typo

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a8204aab9db2b..50d798172e388 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4164,7 +4164,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                    'deprecated and will be removed in a future version. To '
                    'silence this warning, wrap the lists in a Series / Index '
                    'or np.ndarray. E.g. df.set_index(["A", [1, 2, 3]]) should '
-                   'be passed as df.set_index(["A", pd.Series([1, 2, 3])).')
+                   'be passed as df.set_index(["A", pd.Series([1, 2, 3])]).')
             warnings.warn(msg, FutureWarning, stacklevel=2)
 
         if inplace:

From b0b326fae4cce6424ab629f31b4857cb2300b550 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Wed, 16 Jan 2019 20:19:20 +0100
Subject: [PATCH 10/14] Tuples always considered keys; KeyError, not ValueError
 if missing

---
 pandas/core/frame.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a4dc821a65789..b9d185ace388f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4145,13 +4145,11 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         missing = []
         depr_warn = False
         for col in keys:
-            if (is_scalar(col) or isinstance(col, tuple)) and col in self:
+            if (is_scalar(col) or isinstance(col, tuple)):
                 # if col is a valid column key, everything is fine
-                continue
-            elif is_scalar(col) and col not in self:
-                # tuples that are not keys are not considered missing,
-                # but illegal (see below)
-                missing.append(col)
+                # tuples are always considered keys, never as list-likes
+                if col not in self:
+                    missing.append(col)
             elif isinstance(col, list):
                 depr_warn = True
             elif (not isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray))

From 02148015563e99d396712b03a5b834a0c99525b3 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 24 Feb 2019 11:25:37 +0100
Subject: [PATCH 11/14] Actually commit fix for conflict, duh

---
 pandas/core/frame.py                  | 42 ++++++++-------------------
 pandas/tests/frame/test_alter_axes.py |  9 ------
 2 files changed, 12 insertions(+), 39 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8aedfb97d75c2..79233addc77e2 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4025,14 +4025,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             This parameter can be either a single column key, a single array of
             the same length as the calling DataFrame, or a list containing an
             arbitrary combination of column keys and arrays. Here, "array"
-<<<<<<< HEAD
-            encompasses :class:`Series`, :class:`Index` and ``np.ndarray``.
-            Lists (in the sense of a sequence of values, not column labels)
-            have been deprecated, and will be removed in a future version.
-=======
             encompasses :class:`Series`, :class:`Index`, ``np.ndarray``, and
             instances of :class:`abc.Iterator`.
->>>>>>> upstream/master
+            Lists (in the sense of a sequence of values, not column labels)
+            have been deprecated, and will be removed in a future version.
         drop : bool, default True
             Delete columns to be used as the new index.
         append : bool, default False
@@ -4118,34 +4114,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         missing = []
         depr_warn = False
         for col in keys:
-<<<<<<< HEAD
-            if (is_scalar(col) or isinstance(col, tuple)):
-                # if col is a valid column key, everything is fine
-                # tuples are always considered keys, never as list-likes
-                if col not in self:
-                    missing.append(col)
-            elif isinstance(col, list):
-                depr_warn = True
-            elif (not isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray))
-                  or getattr(col, 'ndim', 1) > 1):
-                raise ValueError(err_msg)
-
-        if missing:
-            raise KeyError('{}'.format(missing))
-        if depr_warn:
-            msg = ('Passing lists within a list to the parameter "keys" is '
-                   'deprecated and will be removed in a future version. To '
-                   'silence this warning, wrap the lists in a Series / Index '
-                   'or np.ndarray. E.g. df.set_index(["A", [1, 2, 3]]) should '
-                   'be passed as df.set_index(["A", pd.Series([1, 2, 3])]).')
-            warnings.warn(msg, FutureWarning, stacklevel=2)
-=======
             if isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray,
-                                list, Iterator)):
+                                Iterator)):
                 # arrays are fine as long as they are one-dimensional
                 # iterators get converted to list below
                 if getattr(col, 'ndim', 1) != 1:
                     raise ValueError(err_msg)
+            elif isinstance(col, list):
+                depr_warn = True
             else:
                 # everything else gets tried as a key; see GH 24969
                 try:
@@ -4159,7 +4135,13 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
         if missing:
             raise KeyError('None of {} are in the columns'.format(missing))
->>>>>>> upstream/master
+        if depr_warn:
+            msg = ('Passing lists within a list to the parameter "keys" is '
+                   'deprecated and will be removed in a future version. To '
+                   'silence this warning, wrap the lists in a Series / Index '
+                   'or np.ndarray. E.g. df.set_index(["A", [1, 2, 3]]) should '
+                   'be passed as df.set_index(["A", pd.Series([1, 2, 3])]).')
+            warnings.warn(msg, FutureWarning, stacklevel=2)
 
         if inplace:
             frame = self
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 51f83b870c0d8..9ea2d4258bc9c 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -279,21 +279,12 @@ def test_set_index_raise_on_type(self, frame_of_index_cols, box,
         df = frame_of_index_cols
 
         msg = 'The parameter "keys" may be a column key, .*'
-<<<<<<< HEAD
-        # forbidden type, e.g. set/iter
-        with pytest.raises(ValueError, match=msg):
-            df.set_index(box(df['A']), drop=drop, append=append)
-
-        # forbidden type in list, e.g. set/iter
-        with pytest.raises(ValueError, match=msg):
-=======
         # forbidden type, e.g. set
         with pytest.raises(TypeError, match=msg):
             df.set_index(box(df['A']), drop=drop, append=append)
 
         # forbidden type in list, e.g. set
         with pytest.raises(TypeError, match=msg):
->>>>>>> upstream/master
             df.set_index(['A', df['A'], box(df['A'])],
                          drop=drop, append=append)
 

From 61c511d7a5aa92815a9159ba1423bfd0a4dc61a3 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 24 Feb 2019 11:25:48 +0100
Subject: [PATCH 12/14] Move whatsnew to 0.25

---
 doc/source/whatsnew/v0.24.0.rst | 1 -
 doc/source/whatsnew/v0.25.0.rst | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 0592e44ca9893..a49ea2cf493a6 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1323,7 +1323,6 @@ Deprecations
 - In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`).
 - :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`)
 - :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`)
-- :meth:`DataFrame.set_index` has deprecated using lists of values *within* lists. It remains possible to pass array-likes, both directly and within a list.
 - Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version.  Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`)
 - ``Series.cat.categorical``, ``Series.cat.name`` and ``Sersies.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`).
 - Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`Index`, :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`).
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 170e7f14da397..4141d7f67b35a 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -79,6 +79,7 @@ Deprecations
 ~~~~~~~~~~~~
 
 - Deprecated the `M (months)` and `Y (year)` `units` parameter of :func: `pandas.to_timedelta`, :func: `pandas.Timedelta` and :func: `pandas.TimedeltaIndex` (:issue:`16344`)
+- :meth:`DataFrame.set_index` has deprecated using lists of values *within* lists. It remains possible to pass array-likes, both directly and within a list.
 
 .. _whatsnew_0250.prior_deprecations:
 

From 5fa544c5858725b792332a958db2f518f8fa8d12 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 1 Mar 2019 19:18:23 +0100
Subject: [PATCH 13/14] Add deprecation-section (review jreback)

---
 doc/source/whatsnew/v0.25.0.rst | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 0ee3f0809f212..c0e0d7908e7e6 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -93,8 +93,34 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 
+**Lists as arrays in :meth:`DataFrame.set_index`**
+
+Currently, :meth:`DataFrame.set_index` accepts lists as meaning two different things - as a list of labels, and as an array-like collection of values.
+This ambiguity decides in favor of the list of labels, but nested lists are interpreted as arrays:
+
+.. ipython:: ipython
+    :okwarning:
+
+    df = pd.DataFrame(np.reshape(np.arange(12), (3, 4)), columns=['a', 'b', 'c', 'd'])
+    df.set_index(['a', 'b', 'c'])
+    df.set_index([['a', 'b', 'c']])
+
+The latter case has now been deprecated and will be removed in a future version. As a replacement,
+it is suggested to wrap the list in a :class:`Series`, :class:`Index`, ``np.array`` or an iterator.
+
+.. ipython:: ipython
+
+    df.set_index(pd.Series(['a', 'b', 'c']))
+
+It remains possible to use lists as collecting several column keys or arrays to create multiple levels of a :class:`MultiIndex`.
+
+.. ipython:: ipython
+
+    df.set_index(['a', pd.Series(['a', 'b', 'c'])])
+
+**Other deprecations**
+
 - Deprecated the `M (months)` and `Y (year)` `units` parameter of :func: `pandas.to_timedelta`, :func: `pandas.Timedelta` and :func: `pandas.TimedeltaIndex` (:issue:`16344`)
-- :meth:`DataFrame.set_index` has deprecated using lists of values *within* lists. It remains possible to pass array-likes, both directly and within a list.
 
 .. _whatsnew_0250.prior_deprecations:
 

From 0c65876b7818329d7cd602ab95ab87648cdf0122 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 3 Mar 2019 21:39:55 +0100
Subject: [PATCH 14/14] Fix doc fails

---
 doc/source/whatsnew/v0.25.0.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 560f841ec30b1..a2537a20058d4 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -98,17 +98,18 @@ Deprecations
 Currently, :meth:`DataFrame.set_index` accepts lists as meaning two different things - as a list of labels, and as an array-like collection of values.
 This ambiguity decides in favor of the list of labels, but nested lists are interpreted as arrays:
 
-.. ipython:: ipython
+.. ipython:: python
     :okwarning:
 
-    df = pd.DataFrame(np.reshape(np.arange(12), (3, 4)), columns=['a', 'b', 'c', 'd'])
+    df = pd.DataFrame(np.reshape(np.arange(12), (3, 4)),
+                      columns=['a', 'b', 'c', 'd'])
     df.set_index(['a', 'b', 'c'])
     df.set_index([['a', 'b', 'c']])
 
 The latter case has now been deprecated and will be removed in a future version. As a replacement,
 it is suggested to wrap the list in a :class:`Series`, :class:`Index`, ``np.array`` or an iterator.
 
-.. ipython:: ipython
+.. ipython:: python
 
     df.set_index(pd.Series(['a', 'b', 'c']))