From 4c5eddd63e94bacddb96bf61f81a6a8fcd9c33f0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 20 Aug 2020 21:19:10 -0700
Subject: [PATCH 01/71] REF: remove unnecesary try/except

---
 pandas/core/groupby/generic.py | 69 ++++++++++++++++------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 166631e69f523..51532a75d2d4a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -31,7 +31,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
+from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -60,6 +60,7 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
+from pandas.core.arrays import ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
@@ -1034,32 +1035,31 @@ def _cython_agg_blocks(
 
         no_result = object()
 
-        def cast_result_block(result, block: "Block", how: str) -> "Block":
-            # see if we can cast the block to the desired dtype
+        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
+            # see if we can cast the values to the desired dtype
             # this may not be the original dtype
             assert not isinstance(result, DataFrame)
             assert result is not no_result
 
-            dtype = maybe_cast_result_dtype(block.dtype, how)
+            dtype = maybe_cast_result_dtype(values.dtype, how)
             result = maybe_downcast_numeric(result, dtype)
 
-            if block.is_extension and isinstance(result, np.ndarray):
-                # e.g. block.values was an IntegerArray
-                # (1, N) case can occur if block.values was Categorical
+            if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
+                # e.g. values was an IntegerArray
+                # (1, N) case can occur if values was Categorical
                 #  and result is ndarray[object]
                 # TODO(EA2D): special casing not needed with 2D EAs
                 assert result.ndim == 1 or result.shape[0] == 1
                 try:
                     # Cast back if feasible
-                    result = type(block.values)._from_sequence(
-                        result.ravel(), dtype=block.values.dtype
+                    result = type(values)._from_sequence(
+                        result.ravel(), dtype=values.dtype
                     )
                 except (ValueError, TypeError):
                     # reshape to be valid for non-Extension Block
                     result = result.reshape(1, -1)
 
-            agg_block: "Block" = block.make_block(result)
-            return agg_block
+            return result
 
         def blk_func(block: "Block") -> List["Block"]:
             new_blocks: List["Block"] = []
@@ -1093,33 +1093,30 @@ def blk_func(block: "Block") -> List["Block"]:
                 # Categoricals. This will done by later self._reindex_output()
                 # Doing it here creates an error. See GH#34951
                 sgb = get_groupby(obj, self.grouper, observed=True)
-                try:
-                    result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
-                except TypeError:
-                    # we may have an exception in trying to aggregate
-                    # continue and exclude the block
-                    raise
+                result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+
+                result = cast(DataFrame, result)
+                # unwrap DataFrame to get array
+                if len(result._mgr.blocks) != 1:
+                    # We've split an object block! Everything we've assumed
+                    # about a single block input returning a single block output
+                    # is a lie. To keep the code-path for the typical non-split case
+                    # clean, we choose to clean up this mess later on.
+                    assert len(locs) == result.shape[1]
+                    for i, loc in enumerate(locs):
+                        agg_block = result.iloc[:, [i]]._mgr.blocks[0]
+                        agg_block.mgr_locs = [loc]
+                        new_blocks.append(agg_block)
                 else:
-                    result = cast(DataFrame, result)
-                    # unwrap DataFrame to get array
-                    if len(result._mgr.blocks) != 1:
-                        # We've split an object block! Everything we've assumed
-                        # about a single block input returning a single block output
-                        # is a lie. To keep the code-path for the typical non-split case
-                        # clean, we choose to clean up this mess later on.
-                        assert len(locs) == result.shape[1]
-                        for i, loc in enumerate(locs):
-                            agg_block = result.iloc[:, [i]]._mgr.blocks[0]
-                            agg_block.mgr_locs = [loc]
-                            new_blocks.append(agg_block)
-                    else:
-                        result = result._mgr.blocks[0].values
-                        if isinstance(result, np.ndarray) and result.ndim == 1:
-                            result = result.reshape(1, -1)
-                        agg_block = cast_result_block(result, block, how)
-                        new_blocks = [agg_block]
+                    result = result._mgr.blocks[0].values
+                    if isinstance(result, np.ndarray) and result.ndim == 1:
+                        result = result.reshape(1, -1)
+                    res_values = cast_agg_result(result, block.values, how)
+                    agg_block = block.make_block(res_values)
+                    new_blocks = [agg_block]
             else:
-                agg_block = cast_result_block(result, block, how)
+                res_values = cast_agg_result(result, block.values, how)
+                agg_block = block.make_block(res_values)
                 new_blocks = [agg_block]
             return new_blocks
 

From 42649fbb855a895ee5818d7dc80bdbd0ce0e9f5a Mon Sep 17 00:00:00 2001
From: Karthik Mathur <22126205+mathurk1@users.noreply.github.com>
Date: Fri, 21 Aug 2020 17:34:51 -0500
Subject: [PATCH 02/71] TST: add test for agg on ordered categorical cols
 (#35630)

---
 .../tests/groupby/aggregate/test_aggregate.py | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index ce9d4b892d775..8fe450fe6abfc 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1063,6 +1063,85 @@ def test_groupby_get_by_index():
     pd.testing.assert_frame_equal(res, expected)
 
 
+@pytest.mark.parametrize(
+    "grp_col_dict, exp_data",
+    [
+        ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}),
+        ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}),
+        ({"nr": "min"}, {"nr": [1, 5]}),
+    ],
+)
+def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
+    # test single aggregations on ordered categorical cols GHGH27800
+
+    # create the result dataframe
+    input_df = pd.DataFrame(
+        {
+            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
+            "cat_ord": list("aabbccdd"),
+            "cat": list("aaaabbbb"),
+        }
+    )
+
+    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
+    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
+    result_df = input_df.groupby("cat").agg(grp_col_dict)
+
+    # create expected dataframe
+    cat_index = pd.CategoricalIndex(
+        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
+    )
+
+    expected_df = pd.DataFrame(data=exp_data, index=cat_index)
+
+    tm.assert_frame_equal(result_df, expected_df)
+
+
+@pytest.mark.parametrize(
+    "grp_col_dict, exp_data",
+    [
+        ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]),
+        ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]),
+        ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]),
+    ],
+)
+def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
+    # test combined aggregations on ordered categorical cols GH27800
+
+    # create the result dataframe
+    input_df = pd.DataFrame(
+        {
+            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
+            "cat_ord": list("aabbccdd"),
+            "cat": list("aaaabbbb"),
+        }
+    )
+
+    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
+    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
+    result_df = input_df.groupby("cat").agg(grp_col_dict)
+
+    # create expected dataframe
+    cat_index = pd.CategoricalIndex(
+        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
+    )
+
+    # unpack the grp_col_dict to create the multi-index tuple
+    # this tuple will be used to create the expected dataframe index
+    multi_index_list = []
+    for k, v in grp_col_dict.items():
+        if isinstance(v, list):
+            for value in v:
+                multi_index_list.append([k, value])
+        else:
+            multi_index_list.append([k, v])
+    multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list))
+
+    expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index)
+
+    tm.assert_frame_equal(result_df, expected_df)
+
+
 def test_nonagg_agg():
     # GH 35490 - Single/Multiple agg of non-agg function give same results
     # TODO: agg should raise for functions that don't aggregate

From 47121ddc1c655f428c6c3fcea8fbf02eba85600a Mon Sep 17 00:00:00 2001
From: tkmz-n <60312218+tkmz-n@users.noreply.github.com>
Date: Sat, 22 Aug 2020 07:42:50 +0900
Subject: [PATCH 03/71] TST: resample does not yield empty groups (#10603)
 (#35799)

---
 pandas/tests/resample/test_timedelta.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py
index 0fbb60c176b30..3fa85e62d028c 100644
--- a/pandas/tests/resample/test_timedelta.py
+++ b/pandas/tests/resample/test_timedelta.py
@@ -150,3 +150,18 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
     tm.assert_index_equal(result.index, expected_index)
     assert result.index.freq == expected_index.freq
     assert not np.isnan(result[-1])
+
+
+def test_resample_with_timedelta_yields_no_empty_groups():
+    # GH 10603
+    df = pd.DataFrame(
+        np.random.normal(size=(10000, 4)),
+        index=pd.timedelta_range(start="0s", periods=10000, freq="3906250n"),
+    )
+    result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
+
+    expected = pd.DataFrame(
+        [[768.0] * 4] * 12 + [[528.0] * 4],
+        index=pd.timedelta_range(start="1s", periods=13, freq="3s"),
+    )
+    tm.assert_frame_equal(result, expected)

From 1decb3e0ee1923a29b8eded7507bcb783b3870d0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 21 Aug 2020 18:48:02 -0700
Subject: [PATCH 04/71] revert accidental rebase

---
 pandas/core/groupby/generic.py | 61 ++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 4b1f6cfe0a662..60e23b14eaf09 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -30,7 +30,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
+from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -59,7 +59,6 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
-from pandas.core.arrays import ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
@@ -1034,31 +1033,32 @@ def _cython_agg_blocks(
 
         no_result = object()
 
-        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
-            # see if we can cast the values to the desired dtype
+        def cast_result_block(result, block: "Block", how: str) -> "Block":
+            # see if we can cast the block to the desired dtype
             # this may not be the original dtype
             assert not isinstance(result, DataFrame)
             assert result is not no_result
 
-            dtype = maybe_cast_result_dtype(values.dtype, how)
+            dtype = maybe_cast_result_dtype(block.dtype, how)
             result = maybe_downcast_numeric(result, dtype)
 
-            if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
-                # e.g. values was an IntegerArray
-                # (1, N) case can occur if values was Categorical
+            if block.is_extension and isinstance(result, np.ndarray):
+                # e.g. block.values was an IntegerArray
+                # (1, N) case can occur if block.values was Categorical
                 #  and result is ndarray[object]
                 # TODO(EA2D): special casing not needed with 2D EAs
                 assert result.ndim == 1 or result.shape[0] == 1
                 try:
                     # Cast back if feasible
-                    result = type(values)._from_sequence(
-                        result.ravel(), dtype=values.dtype
+                    result = type(block.values)._from_sequence(
+                        result.ravel(), dtype=block.values.dtype
                     )
                 except (ValueError, TypeError):
                     # reshape to be valid for non-Extension Block
                     result = result.reshape(1, -1)
 
-            return result
+            agg_block: "Block" = block.make_block(result)
+            return agg_block
 
         def blk_func(block: "Block") -> List["Block"]:
             new_blocks: List["Block"] = []
@@ -1092,25 +1092,28 @@ def blk_func(block: "Block") -> List["Block"]:
                 # Categoricals. This will done by later self._reindex_output()
                 # Doing it here creates an error. See GH#34951
                 sgb = get_groupby(obj, self.grouper, observed=True)
-                result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
-
-                assert isinstance(result, (Series, DataFrame))  # for mypy
-                # In the case of object dtype block, it may have been split
-                #  in the operation.  We un-split here.
-                result = result._consolidate()
-                assert isinstance(result, (Series, DataFrame))  # for mypy
-                assert len(result._mgr.blocks) == 1
-
-                # unwrap DataFrame to get array
-                result = result._mgr.blocks[0].values
-                if isinstance(result, np.ndarray) and result.ndim == 1:
-                    result = result.reshape(1, -1)
-                res_values = cast_agg_result(result, block.values, how)
-                agg_block = block.make_block(res_values)
-                new_blocks = [agg_block]
+                try:
+                    result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+                except TypeError:
+                    # we may have an exception in trying to aggregate
+                    # continue and exclude the block
+                    raise
+                else:
+                    assert isinstance(result, (Series, DataFrame))  # for mypy
+                    # In the case of object dtype block, it may have been split
+                    #  in the operation.  We un-split here.
+                    result = result._consolidate()
+                    assert isinstance(result, (Series, DataFrame))  # for mypy
+                    assert len(result._mgr.blocks) == 1
+
+                    # unwrap DataFrame to get array
+                    result = result._mgr.blocks[0].values
+                    if isinstance(result, np.ndarray) and result.ndim == 1:
+                        result = result.reshape(1, -1)
+                    agg_block = cast_result_block(result, block, how)
+                    new_blocks = [agg_block]
             else:
-                res_values = cast_agg_result(result, block.values, how)
-                agg_block = block.make_block(res_values)
+                agg_block = cast_result_block(result, block, how)
                 new_blocks = [agg_block]
             return new_blocks
 

From 51205a51dd75c791848c353e9af3d8b46aa4afd6 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 26 Aug 2020 18:51:45 -0700
Subject: [PATCH 05/71] REF/BUG: don't go through cython for EA indexes

---
 pandas/core/groupby/generic.py | 50 +++++++++++++++++++++++++++++-----
 pandas/core/groupby/ops.py     |  5 ++++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 2afa56b50c3c7..36db78a77c511 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -74,7 +74,14 @@
     get_groupby,
 )
 from pandas.core.groupby.numba_ import generate_numba_func, split_for_numba
-from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same
+from pandas.core.indexes.api import (
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    PeriodIndex,
+    TimedeltaIndex,
+    all_indexes_same,
+)
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
@@ -262,17 +269,46 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             if self.grouper.nkeys > 1:
                 return self._python_agg_general(func, *args, **kwargs)
 
-            try:
-                return self._python_agg_general(func, *args, **kwargs)
-            except (ValueError, KeyError):
-                # TODO: KeyError is raised in _python_agg_general,
-                #  see see test_groupby.test_basic
+            if isinstance(
+                self._selected_obj.index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)
+            ):
+                # using _python_agg_general would end up incorrectly patching
+                #  _index_data in reduction.pyx
                 result = self._aggregate_named(func, *args, **kwargs)
+            else:
+                try:
+                    return self._python_agg_general(func, *args, **kwargs)
+                except (ValueError, KeyError):
+                    # TODO: KeyError is raised in _python_agg_general,
+                    #  see see test_groupby.test_basic
+                    result = self._aggregate_named(func, *args, **kwargs)
 
             index = Index(sorted(result), name=self.grouper.names[0])
+            if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
+                # TODO: do we _always_ want to do this?
+                #  shouldnt this be done later in eg _wrap_aggregated_output?
+                index = index._with_freq("infer")
+
+                result_index = self.grouper.result_index
+
+                if (
+                    result_index.dtype == index.dtype
+                    and result_index.freq is not None
+                    and index.freq is None
+                ):
+                    # TODO: will dtype equality always hold?
+                    if len(index) == 1:
+                        index.freq = result_index.freq
+
+                    elif len(index) == 2:
+                        if index[0] + result_index.freq == index[1]:
+                            # infer_freq doesn't handle length-2 indexes
+                            index.freq = result_index.freq
+
             ret = create_series_with_explicit_dtype(
                 result, index=index, dtype_if_empty=object
             )
+            ret.name = self._selected_obj.name  # test_metadata_propagation_indiv
 
         if not self.as_index:  # pragma: no cover
             print("Warning, ignoring as_index=True")
@@ -478,7 +514,7 @@ def _get_index() -> Index:
     def _aggregate_named(self, func, *args, **kwargs):
         result = {}
 
-        for name, group in self:
+        for name, group in self:  # TODO: could we have duplicate names?
             group.name = name
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index c6171a55359fe..66a9f1353d3c5 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -672,6 +672,11 @@ def _aggregate_series_pure_python(
                         # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                         # FIXME: are we potentially losing important res.index info?
                         res = res.item()
+                    elif group.dtype == object:
+                        # TODO: is this at all right?
+                        # e.g. test_agg_over_numpy_arrays where we have entries
+                        #  that are each ndarrays
+                        pass
                     else:
                         raise ValueError("Function does not reduce")
                 result = np.empty(ngroups, dtype="O")

From f453c5b3c74a86d4012b9478a3b64204f7cd81dc Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 26 Aug 2020 20:46:43 -0700
Subject: [PATCH 06/71] Implement _aggregate_maybe_named

---
 pandas/core/groupby/generic.py | 30 +++++++++++++++++++++++++++---
 pandas/core/groupby/ops.py     |  5 -----
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 36db78a77c511..9b72157ddd087 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -274,14 +274,14 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             ):
                 # using _python_agg_general would end up incorrectly patching
                 #  _index_data in reduction.pyx
-                result = self._aggregate_named(func, *args, **kwargs)
+                result = self._aggregate_maybe_named(func, *args, **kwargs)
             else:
                 try:
                     return self._python_agg_general(func, *args, **kwargs)
                 except (ValueError, KeyError):
                     # TODO: KeyError is raised in _python_agg_general,
                     #  see see test_groupby.test_basic
-                    result = self._aggregate_named(func, *args, **kwargs)
+                    result = self._aggregate_maybe_named(func, *args, **kwargs)
 
             index = Index(sorted(result), name=self.grouper.names[0])
             if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
@@ -511,11 +511,35 @@ def _get_index() -> Index:
             )
             return self._reindex_output(result)
 
+    def _aggregate_maybe_named(self, func, *args, **kwargs):
+        """
+        Try the named-aggregator first, then unnamed, which better matches
+        what libreduction does.
+        """
+        try:
+            return self._aggregate_named(func, *args, **kwargs)
+        except KeyError:
+            return self._aggregate_unnamed(func, *args, **kwargs)
+
     def _aggregate_named(self, func, *args, **kwargs):
         result = {}
 
         for name, group in self:  # TODO: could we have duplicate names?
-            group.name = name
+            group.name = name  # only difference vs _aggregate_unnamed
+            output = func(group, *args, **kwargs)
+            if isinstance(output, (Series, Index, np.ndarray)):
+                raise ValueError("Must produce aggregated value")
+            result[name] = output
+
+        return result
+
+    def _aggregate_unnamed(self, func, *args, **kwargs):
+        """
+        Pure-python analogue of what _python_agg_general does.
+        """
+        result = {}
+
+        for name, group in self:  # TODO: could we have duplicate names?
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
                 raise ValueError("Must produce aggregated value")
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 66a9f1353d3c5..c6171a55359fe 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -672,11 +672,6 @@ def _aggregate_series_pure_python(
                         # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                         # FIXME: are we potentially losing important res.index info?
                         res = res.item()
-                    elif group.dtype == object:
-                        # TODO: is this at all right?
-                        # e.g. test_agg_over_numpy_arrays where we have entries
-                        #  that are each ndarrays
-                        pass
                     else:
                         raise ValueError("Function does not reduce")
                 result = np.empty(ngroups, dtype="O")

From 2ae2124fab275218268b680f5d5ce9e4bbefebe9 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 27 Aug 2020 09:01:00 -0700
Subject: [PATCH 07/71] de-duplicate

---
 pandas/core/groupby/generic.py | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9b72157ddd087..7927a77141b3d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -517,32 +517,28 @@ def _aggregate_maybe_named(self, func, *args, **kwargs):
         what libreduction does.
         """
         try:
-            return self._aggregate_named(func, *args, **kwargs)
+            return self._aggregate_named(func, *args, named=True, **kwargs)
         except KeyError:
-            return self._aggregate_unnamed(func, *args, **kwargs)
+            return self._aggregate_named(func, *args, named=False, **kwargs)
 
-    def _aggregate_named(self, func, *args, **kwargs):
+    def _aggregate_named(self, func, *args, named: bool = True, **kwargs):
         result = {}
 
         for name, group in self:  # TODO: could we have duplicate names?
-            group.name = name  # only difference vs _aggregate_unnamed
-            output = func(group, *args, **kwargs)
-            if isinstance(output, (Series, Index, np.ndarray)):
-                raise ValueError("Must produce aggregated value")
-            result[name] = output
+            if named:
+                group.name = name
 
-        return result
-
-    def _aggregate_unnamed(self, func, *args, **kwargs):
-        """
-        Pure-python analogue of what _python_agg_general does.
-        """
-        result = {}
-
-        for name, group in self:  # TODO: could we have duplicate names?
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
-                raise ValueError("Must produce aggregated value")
+                if (
+                    isinstance(output, Series)
+                    and len(output) == 1
+                    and name in output.index
+                ):
+                    # FIXME: kludge for test_resampler_grouper.test_apply
+                    output = output.iloc[0]
+                else:
+                    raise ValueError("Must produce aggregated value")
             result[name] = output
 
         return result

From 98a91a321ff758682bff573ecee1b0bf2e0e6d2e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 27 Aug 2020 14:48:49 -0700
Subject: [PATCH 08/71] avoid passing RangeIndex to libreduction

---
 pandas/core/groupby/ops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index c6171a55359fe..98e4539adbe24 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -45,7 +45,7 @@
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base, grouper
-from pandas.core.indexes.api import Index, MultiIndex, ensure_index
+from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index
 from pandas.core.series import Series
 from pandas.core.sorting import (
     compress_group_index,
@@ -620,8 +620,10 @@ def agg_series(
             # TODO: can we get a performant workaround for EAs backed by ndarray?
             return self._aggregate_series_pure_python(obj, func)
 
-        elif obj.index._has_complex_internals:
+        elif obj.index._has_complex_internals or isinstance(obj.index, RangeIndex):
             # Preempt TypeError in _aggregate_series_fast
+            # exclude RangeIndex because patching it in libreduction would
+            #  silently be incorrect
             return self._aggregate_series_pure_python(obj, func)
 
         try:

From c230f72b502446ac3c4a65fe7c79c7314b158bb0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 1 Sep 2020 19:29:18 -0700
Subject: [PATCH 09/71] simplify

---
 pandas/core/groupby/generic.py     | 23 ++---------------------
 pandas/tests/resample/test_base.py | 13 ++++++++-----
 2 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 45833a882fc0f..20dfb3e8fddd8 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -282,27 +282,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     #  see see test_groupby.test_basic
                     result = self._aggregate_maybe_named(func, *args, **kwargs)
 
-            index = Index(sorted(result), name=self.grouper.names[0])
-            if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
-                # TODO: do we _always_ want to do this?
-                #  shouldnt this be done later in eg _wrap_aggregated_output?
-                index = index._with_freq("infer")
-
-                result_index = self.grouper.result_index
-
-                if (
-                    result_index.dtype == index.dtype
-                    and result_index.freq is not None
-                    and index.freq is None
-                ):
-                    # TODO: will dtype equality always hold?
-                    if len(index) == 1:
-                        index.freq = result_index.freq
-
-                    elif len(index) == 2:
-                        if index[0] + result_index.freq == index[1]:
-                            # infer_freq doesn't handle length-2 indexes
-                            index.freq = result_index.freq
+            index = self.grouper.result_index
+            assert index.name == self.grouper.names[0]
 
             ret = create_series_with_explicit_dtype(
                 result, index=index, dtype_if_empty=object
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index 28d33ebb23c20..5827b1f456bd7 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -195,14 +195,17 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
 
 
 @all_ts
-def test_apply_to_empty_series(empty_series_dti):
+@pytest.mark.parametrize("freq", ["M", "D", "H"])
+def test_apply_to_empty_series(empty_series_dti, freq):
     # GH 14313
     s = empty_series_dti
-    for freq in ["M", "D", "H"]:
-        result = s.resample(freq).apply(lambda x: 1)
-        expected = s.resample(freq).apply(np.sum)
 
-        tm.assert_series_equal(result, expected, check_dtype=False)
+    result = s.resample(freq).apply(lambda x: 1)
+    expected = s.resample(freq).apply(np.sum)
+
+    assert result.index.dtype == expected.index.dtype
+
+    tm.assert_series_equal(result, expected, check_dtype=False)
 
 
 @all_ts

From ba48381784fb7fe74c7042ded640661ede9a21e7 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 25 Jun 2020 12:35:37 -0700
Subject: [PATCH 10/71] REF: dont set ndarray.data in libreduction

---
 pandas/_libs/reduction.pyx | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 8161b5c5c2b11..84f53e0b061eb 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -296,7 +296,7 @@ cdef class Slider:
     Only handles contiguous data for now
     """
     cdef:
-        ndarray values, buf
+        ndarray values, buf, orig_buf
         Py_ssize_t stride, orig_len, orig_stride
         char *orig_data
 
@@ -308,6 +308,7 @@ cdef class Slider:
             values = values.copy()
 
         self.values = values
+        self.orig_buf = buf
         self.buf = buf
         self.stride = values.strides[0]
 
@@ -315,21 +316,14 @@ cdef class Slider:
         self.orig_len = self.buf.shape[0]
         self.orig_stride = self.buf.strides[0]
 
-        self.buf.data = self.values.data
-        self.buf.strides[0] = self.stride
-
     cdef move(self, int start, int end):
         """
         For slicing
         """
-        self.buf.data = self.values.data + self.stride * start
-        self.buf.shape[0] = end - start
+        self.buf = self.values[start:end]
 
     cdef reset(self):
-
-        self.buf.shape[0] = self.orig_len
-        self.buf.data = self.orig_data
-        self.buf.strides[0] = self.orig_stride
+        self.buf = self.orig_buf
 
 
 class InvalidApply(Exception):

From e52db7dd031243a3c56219e2aef63e36804105ae Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 22 Jul 2020 09:30:04 -0700
Subject: [PATCH 11/71] less test failures

---
 pandas/_libs/reduction.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 84f53e0b061eb..09cbbe0602319 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -51,7 +51,7 @@ cdef class _BaseGrouper:
             # See the comment in indexes/base.py about _index_data.
             # We need this for EA-backed indexes that have a reference
             # to a 1-d ndarray like datetime / timedelta / period.
-            object.__setattr__(cached_ityp, '_index_data', islider.buf)
+            object.__setattr__(cached_ityp, '_data', islider.buf)
             cached_ityp._engine.clear_mapping()
             cached_ityp._cache.clear()  # e.g. inferred_freq must go
             object.__setattr__(cached_typ._mgr._block, 'values', vslider.buf)
@@ -353,7 +353,7 @@ def apply_frame_axis0(object frame, object f, object names,
             slider.move(starts[i], ends[i])
 
             item_cache.clear()  # ugh
-            chunk = slider.dummy
+            chunk = slider.frame[starts[i]:ends[i]]
             object.__setattr__(chunk, 'name', names[i])
 
             try:

From 972359fd6543f6d6d0e33a18c5ea5006fe99958c Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 18 Aug 2020 15:49:09 -0700
Subject: [PATCH 12/71] port solution from #35417

---
 pandas/_libs/reduction.pyx | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 09cbbe0602319..4c7664833d967 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -400,7 +400,8 @@ cdef class BlockSlider:
         object frame, dummy, index
         int nblocks
         Slider idx_slider
-        list blocks
+        list blocks, blk_values
+        ndarray orig_blklocs, orig_blknos
 
     cdef:
         char **base_ptrs
@@ -414,20 +415,27 @@ cdef class BlockSlider:
         self.dummy = frame[:0]
         self.index = self.dummy.index
 
-        self.blocks = [b.values for b in self.dummy._mgr.blocks]
+        # GH#35417 attributes we need to restore at each step in case
+        #  the function modified them.
+        mgr = self.dummy._mgr
+        self.orig_blklocs = mgr.blklocs
+        self.orig_blknos = mgr.blknos
+        self.blocks = [x for x in self.dummy._mgr.blocks]
 
-        for x in self.blocks:
+        self.blk_values = [b.values for b in self.dummy._mgr.blocks]
+
+        for x in self.blk_values:
             util.set_array_not_contiguous(x)
 
-        self.nblocks = len(self.blocks)
+        self.nblocks = len(self.blk_values)
         # See the comment in indexes/base.py about _index_data.
         # We need this for EA-backed indexes that have a reference to a 1-d
         # ndarray like datetime / timedelta / period.
         self.idx_slider = Slider(
             self.frame.index._index_data, self.dummy.index._index_data)
 
-        self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blocks))
-        for i, block in enumerate(self.blocks):
+        self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blk_values))
+        for i, block in enumerate(self.blk_values):
             self.base_ptrs[i] = (<ndarray>block).data
 
     def __dealloc__(self):
@@ -438,9 +446,11 @@ cdef class BlockSlider:
             ndarray arr
             Py_ssize_t i
 
+        self._restore_blocks()
+
         # move blocks
         for i in range(self.nblocks):
-            arr = self.blocks[i]
+            arr = self.blk_values[i]
 
             # axis=1 is the frame's axis=0
             arr.data = self.base_ptrs[i] + arr.strides[1] * start
@@ -453,14 +463,25 @@ cdef class BlockSlider:
         self.index._engine.clear_mapping()
         self.index._cache.clear()  # e.g. inferred_freq must go
 
+    cdef _restore_blocks(self):
+        """
+        Ensure that we have the original blocks, blknos, and blklocs.
+        """
+        mgr = self.dummy._mgr
+        mgr.blocks = self.blocks
+        mgr._blklocs = self.orig_blklocs
+        mgr._blknos = self.orig_blknos
+
     cdef reset(self):
         cdef:
             ndarray arr
             Py_ssize_t i
 
+        self._restore_blocks()
+
         # reset blocks
         for i in range(self.nblocks):
-            arr = self.blocks[i]
+            arr = self.blk_values[i]
 
             # axis=1 is the frame's axis=0
             arr.data = self.base_ptrs[i]

From 28f6ca58cae7ecf6ba3cf26e778e52e720667ae0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 31 Aug 2020 19:54:45 -0700
Subject: [PATCH 13/71] dont pass Series with DTI to SeriesGrouper

---
 pandas/core/groupby/ops.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 4ce81ac00ddd6..0688d90a1019d 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -45,7 +45,14 @@
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base, grouper
-from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index
+from pandas.core.indexes.api import (
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    RangeIndex,
+    TimedeltaIndex,
+    ensure_index,
+)
 from pandas.core.series import Series
 from pandas.core.sorting import (
     compress_group_index,
@@ -616,7 +623,9 @@ def agg_series(self, obj: Series, func: F, *args, **kwargs):
             # TODO: can we get a performant workaround for EAs backed by ndarray?
             return self._aggregate_series_pure_python(obj, func)
 
-        elif obj.index._has_complex_internals or isinstance(obj.index, RangeIndex):
+        elif obj.index._has_complex_internals or isinstance(
+            obj.index, (RangeIndex, DatetimeIndex, TimedeltaIndex)
+        ):
             # Preempt TypeError in _aggregate_series_fast
             # exclude RangeIndex because patching it in libreduction would
             #  silently be incorrect

From 0aa2a54ddabbb01e0f0178cd1b3ce960a19683cb Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 4 Sep 2020 19:51:04 -0700
Subject: [PATCH 14/71] De-privatize (#36130)

---
 pandas/core/dtypes/dtypes.py                  |  4 +--
 pandas/core/indexes/datetimes.py              |  4 +--
 pandas/core/indexing.py                       |  4 +--
 pandas/core/util/hashing.py                   |  8 ++---
 pandas/io/formats/format.py                   |  4 +--
 pandas/io/formats/style.py                    | 20 ++++++-------
 pandas/plotting/_matplotlib/core.py           | 29 +++++++++----------
 pandas/plotting/_matplotlib/timeseries.py     | 10 +++----
 .../tests/indexing/multiindex/test_slice.py   |  4 +--
 pandas/tests/indexing/test_indexing.py        | 12 ++++----
 10 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 8dc500dddeafa..e321fdd9b3a9b 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -395,7 +395,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
         from pandas.core.dtypes.common import DT64NS_DTYPE, is_datetime64tz_dtype
 
         from pandas.core.util.hashing import (
-            _combine_hash_arrays,
+            combine_hash_arrays,
             hash_array,
             hash_tuples,
         )
@@ -427,7 +427,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
             )
         else:
             cat_array = [cat_array]
-        hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
+        hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
         return np.bitwise_xor.reduce(hashed)
 
     @classmethod
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 6dcb9250812d0..3fd93a8159041 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -354,9 +354,9 @@ def _mpl_repr(self):
 
     @property
     def _formatter_func(self):
-        from pandas.io.formats.format import _get_format_datetime64
+        from pandas.io.formats.format import get_format_datetime64
 
-        formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
+        formatter = get_format_datetime64(is_dates_only=self._is_dates_only)
         return lambda x: f"'{formatter(x, tz=self.tz)}'"
 
     # --------------------------------------------------------------------
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index cfb17b9498a36..fe2fec1c52063 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -2291,7 +2291,7 @@ def need_slice(obj) -> bool:
     )
 
 
-def _non_reducing_slice(slice_):
+def non_reducing_slice(slice_):
     """
     Ensure that a slice doesn't reduce to a Series or Scalar.
 
@@ -2330,7 +2330,7 @@ def pred(part) -> bool:
     return tuple(slice_)
 
 
-def _maybe_numeric_slice(df, slice_, include_bool=False):
+def maybe_numeric_slice(df, slice_, include_bool: bool = False):
     """
     Want nice defaults for background_gradient that don't break
     with non-numeric data. But if slice_ is passed go with that.
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index d79b9f4092325..df082c7285ae8 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -24,7 +24,7 @@
 _default_hash_key = "0123456789123456"
 
 
-def _combine_hash_arrays(arrays, num_items: int):
+def combine_hash_arrays(arrays, num_items: int):
     """
     Parameters
     ----------
@@ -108,7 +108,7 @@ def hash_pandas_object(
                 for _ in [None]
             )
             arrays = itertools.chain([h], index_iter)
-            h = _combine_hash_arrays(arrays, 2)
+            h = combine_hash_arrays(arrays, 2)
 
         h = Series(h, index=obj.index, dtype="uint64", copy=False)
 
@@ -131,7 +131,7 @@ def hash_pandas_object(
             # keep `hashes` specifically a generator to keep mypy happy
             _hashes = itertools.chain(hashes, index_hash_generator)
             hashes = (x for x in _hashes)
-        h = _combine_hash_arrays(hashes, num_items)
+        h = combine_hash_arrays(hashes, num_items)
 
         h = Series(h, index=obj.index, dtype="uint64", copy=False)
     else:
@@ -175,7 +175,7 @@ def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key):
     hashes = (
         _hash_categorical(cat, encoding=encoding, hash_key=hash_key) for cat in vals
     )
-    h = _combine_hash_arrays(hashes, len(vals))
+    h = combine_hash_arrays(hashes, len(vals))
     if is_tuple:
         h = h[0]
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 3d441f6e737bc..3dc4290953360 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1624,7 +1624,7 @@ def _format_datetime64_dateonly(
         return x._date_repr
 
 
-def _get_format_datetime64(
+def get_format_datetime64(
     is_dates_only: bool, nat_rep: str = "NaT", date_format: None = None
 ) -> Callable:
 
@@ -1656,7 +1656,7 @@ def _format_strings(self) -> List[str]:
         """ we by definition have a TZ """
         values = self.values.astype(object)
         is_dates_only = _is_dates_only(values)
-        formatter = self.formatter or _get_format_datetime64(
+        formatter = self.formatter or get_format_datetime64(
             is_dates_only, date_format=self.date_format
         )
         fmt_values = [formatter(x) for x in values]
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 3bbb5271bce61..023557dd6494d 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -36,7 +36,7 @@
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
-from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice
+from pandas.core.indexing import maybe_numeric_slice, non_reducing_slice
 
 jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
 
@@ -475,7 +475,7 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> "Style
             row_locs = range(len(self.data))
             col_locs = range(len(self.data.columns))
         else:
-            subset = _non_reducing_slice(subset)
+            subset = non_reducing_slice(subset)
             if len(subset) == 1:
                 subset = subset, self.data.columns
 
@@ -633,7 +633,7 @@ def _apply(
         **kwargs,
     ) -> "Styler":
         subset = slice(None) if subset is None else subset
-        subset = _non_reducing_slice(subset)
+        subset = non_reducing_slice(subset)
         data = self.data.loc[subset]
         if axis is not None:
             result = data.apply(func, axis=axis, result_type="expand", **kwargs)
@@ -725,7 +725,7 @@ def _applymap(self, func: Callable, subset=None, **kwargs) -> "Styler":
         func = partial(func, **kwargs)  # applymap doesn't take kwargs?
         if subset is None:
             subset = pd.IndexSlice[:]
-        subset = _non_reducing_slice(subset)
+        subset = non_reducing_slice(subset)
         result = self.data.loc[subset].applymap(func)
         self._update_ctx(result)
         return self
@@ -985,7 +985,7 @@ def hide_columns(self, subset) -> "Styler":
         -------
         self : Styler
         """
-        subset = _non_reducing_slice(subset)
+        subset = non_reducing_slice(subset)
         hidden_df = self.data.loc[subset]
         self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns)
         return self
@@ -1087,8 +1087,8 @@ def background_gradient(
         of the data is extended by ``low * (x.max() - x.min())`` and ``high *
         (x.max() - x.min())`` before normalizing.
         """
-        subset = _maybe_numeric_slice(self.data, subset)
-        subset = _non_reducing_slice(subset)
+        subset = maybe_numeric_slice(self.data, subset)
+        subset = non_reducing_slice(subset)
         self.apply(
             self._background_gradient,
             cmap=cmap,
@@ -1322,8 +1322,8 @@ def bar(
                 "(eg: color=['#d65f5f', '#5fba7d'])"
             )
 
-        subset = _maybe_numeric_slice(self.data, subset)
-        subset = _non_reducing_slice(subset)
+        subset = maybe_numeric_slice(self.data, subset)
+        subset = non_reducing_slice(subset)
         self.apply(
             self._bar,
             subset=subset,
@@ -1390,7 +1390,7 @@ def _highlight_handler(
         axis: Optional[Axis] = None,
         max_: bool = True,
     ) -> "Styler":
-        subset = _non_reducing_slice(_maybe_numeric_slice(self.data, subset))
+        subset = non_reducing_slice(maybe_numeric_slice(self.data, subset))
         self.apply(
             self._highlight_extrema, color=color, axis=axis, subset=subset, max_=max_
         )
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 147e4efd74bc3..c1ba7881165f1 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -33,6 +33,13 @@
 from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
 from pandas.plotting._matplotlib.style import get_standard_colors
+from pandas.plotting._matplotlib.timeseries import (
+    decorate_axes,
+    format_dateaxis,
+    maybe_convert_index,
+    maybe_resample,
+    use_dynamic_x,
+)
 from pandas.plotting._matplotlib.tools import (
     create_subplots,
     flatten_axes,
@@ -1074,15 +1081,11 @@ def _is_ts_plot(self) -> bool:
         return not self.x_compat and self.use_index and self._use_dynamic_x()
 
     def _use_dynamic_x(self):
-        from pandas.plotting._matplotlib.timeseries import _use_dynamic_x
-
-        return _use_dynamic_x(self._get_ax(0), self.data)
+        return use_dynamic_x(self._get_ax(0), self.data)
 
     def _make_plot(self):
         if self._is_ts_plot():
-            from pandas.plotting._matplotlib.timeseries import _maybe_convert_index
-
-            data = _maybe_convert_index(self._get_ax(0), self.data)
+            data = maybe_convert_index(self._get_ax(0), self.data)
 
             x = data.index  # dummy, not used
             plotf = self._ts_plot
@@ -1142,24 +1145,18 @@ def _plot(
 
     @classmethod
     def _ts_plot(cls, ax: "Axes", x, data, style=None, **kwds):
-        from pandas.plotting._matplotlib.timeseries import (
-            _decorate_axes,
-            _maybe_resample,
-            format_dateaxis,
-        )
-
         # accept x to be consistent with normal plot func,
         # x is not passed to tsplot as it uses data.index as x coordinate
         # column_num must be in kwds for stacking purpose
-        freq, data = _maybe_resample(data, ax, kwds)
+        freq, data = maybe_resample(data, ax, kwds)
 
         # Set ax with freq info
-        _decorate_axes(ax, freq, kwds)
+        decorate_axes(ax, freq, kwds)
         # digging deeper
         if hasattr(ax, "left_ax"):
-            _decorate_axes(ax.left_ax, freq, kwds)
+            decorate_axes(ax.left_ax, freq, kwds)
         if hasattr(ax, "right_ax"):
-            _decorate_axes(ax.right_ax, freq, kwds)
+            decorate_axes(ax.right_ax, freq, kwds)
         ax._plot_data.append((data, cls._kind, kwds))
 
         lines = cls._plot(ax, data.index, data.values, style=style, **kwds)
diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py
index fd89a093d25a4..f8faac6a6a026 100644
--- a/pandas/plotting/_matplotlib/timeseries.py
+++ b/pandas/plotting/_matplotlib/timeseries.py
@@ -32,7 +32,7 @@
 # Plotting functions and monkey patches
 
 
-def _maybe_resample(series: "Series", ax: "Axes", kwargs):
+def maybe_resample(series: "Series", ax: "Axes", kwargs):
     # resample against axes freq if necessary
     freq, ax_freq = _get_freq(ax, series)
 
@@ -105,7 +105,7 @@ def _replot_ax(ax: "Axes", freq, kwargs):
     ax._plot_data = []
     ax.clear()
 
-    _decorate_axes(ax, freq, kwargs)
+    decorate_axes(ax, freq, kwargs)
 
     lines = []
     labels = []
@@ -128,7 +128,7 @@ def _replot_ax(ax: "Axes", freq, kwargs):
     return lines, labels
 
 
-def _decorate_axes(ax: "Axes", freq, kwargs):
+def decorate_axes(ax: "Axes", freq, kwargs):
     """Initialize axes for time-series plotting"""
     if not hasattr(ax, "_plot_data"):
         ax._plot_data = []
@@ -193,7 +193,7 @@ def _get_freq(ax: "Axes", series: "Series"):
     return freq, ax_freq
 
 
-def _use_dynamic_x(ax: "Axes", data: FrameOrSeriesUnion) -> bool:
+def use_dynamic_x(ax: "Axes", data: FrameOrSeriesUnion) -> bool:
     freq = _get_index_freq(data.index)
     ax_freq = _get_ax_freq(ax)
 
@@ -235,7 +235,7 @@ def _get_index_freq(index: "Index") -> Optional[BaseOffset]:
     return freq
 
 
-def _maybe_convert_index(ax: "Axes", data):
+def maybe_convert_index(ax: "Axes", data):
     # tsplot converts automatically, but don't want to convert index
     # over and over for DataFrames
     if isinstance(data.index, (ABCDatetimeIndex, ABCPeriodIndex)):
diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py
index 532bb4f2e6dac..ec0391a2ccc26 100644
--- a/pandas/tests/indexing/multiindex/test_slice.py
+++ b/pandas/tests/indexing/multiindex/test_slice.py
@@ -6,7 +6,7 @@
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
 import pandas._testing as tm
-from pandas.core.indexing import _non_reducing_slice
+from pandas.core.indexing import non_reducing_slice
 from pandas.tests.indexing.common import _mklbl
 
 
@@ -739,7 +739,7 @@ def test_non_reducing_slice_on_multiindex(self):
         df = pd.DataFrame(dic, index=[0, 1])
         idx = pd.IndexSlice
         slice_ = idx[:, idx["b", "d"]]
-        tslice_ = _non_reducing_slice(slice_)
+        tslice_ = non_reducing_slice(slice_)
 
         result = df.loc[tslice_]
         expected = pd.DataFrame({("b", "d"): [4, 1]})
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index 5b7f013d5de31..a080c5d169215 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -12,7 +12,7 @@
 import pandas as pd
 from pandas import DataFrame, Index, NaT, Series
 import pandas._testing as tm
-from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice
+from pandas.core.indexing import maybe_numeric_slice, non_reducing_slice
 from pandas.tests.indexing.common import _mklbl
 
 # ------------------------------------------------------------------------
@@ -822,7 +822,7 @@ def test_range_in_series_indexing(self, size):
     def test_non_reducing_slice(self, slc):
         df = DataFrame([[0, 1], [2, 3]])
 
-        tslice_ = _non_reducing_slice(slc)
+        tslice_ = non_reducing_slice(slc)
         assert isinstance(df.loc[tslice_], DataFrame)
 
     def test_list_slice(self):
@@ -831,18 +831,18 @@ def test_list_slice(self):
         df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"])
         expected = pd.IndexSlice[:, ["A"]]
         for subset in slices:
-            result = _non_reducing_slice(subset)
+            result = non_reducing_slice(subset)
             tm.assert_frame_equal(df.loc[result], df.loc[expected])
 
     def test_maybe_numeric_slice(self):
         df = DataFrame({"A": [1, 2], "B": ["c", "d"], "C": [True, False]})
-        result = _maybe_numeric_slice(df, slice_=None)
+        result = maybe_numeric_slice(df, slice_=None)
         expected = pd.IndexSlice[:, ["A"]]
         assert result == expected
 
-        result = _maybe_numeric_slice(df, None, include_bool=True)
+        result = maybe_numeric_slice(df, None, include_bool=True)
         expected = pd.IndexSlice[:, ["A", "C"]]
-        result = _maybe_numeric_slice(df, [1])
+        result = maybe_numeric_slice(df, [1])
         expected = [1]
         assert result == expected
 

From 0164b8a85366cfc8b696dc0b41a5da11ca5995e2 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 5 Sep 2020 03:56:30 +0100
Subject: [PATCH 15/71] TYP: misc fixes for numpy types (#36098)

---
 pandas/_typing.py                 | 2 +-
 pandas/core/algorithms.py         | 7 +++----
 pandas/core/arrays/categorical.py | 2 +-
 pandas/core/construction.py       | 6 ++++--
 pandas/core/dtypes/cast.py        | 4 ++--
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index f8af92e07c674..74bfc9134c3af 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -62,7 +62,7 @@
 # other
 
 Dtype = Union[
-    "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]
+    "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]]
 ]
 DtypeObj = Union[np.dtype, "ExtensionDtype"]
 FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase]
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 9d75d21c5637a..f297c7165208f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -6,7 +6,7 @@
 
 import operator
 from textwrap import dedent
-from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast
 from warnings import catch_warnings, simplefilter, warn
 
 import numpy as np
@@ -60,7 +60,7 @@
 from pandas.core.indexers import validate_indices
 
 if TYPE_CHECKING:
-    from pandas import DataFrame, Series
+    from pandas import Categorical, DataFrame, Series
 
 _shared_docs: Dict[str, str] = {}
 
@@ -429,8 +429,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
     if is_categorical_dtype(comps):
         # TODO(extension)
         # handle categoricals
-        # error: "ExtensionArray" has no attribute "isin"  [attr-defined]
-        return comps.isin(values)  # type: ignore[attr-defined]
+        return cast("Categorical", comps).isin(values)
 
     comps, dtype = _ensure_data(comps)
     values, _ = _ensure_data(values, dtype=dtype)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 27b1afdb438cb..ec85ec47d625c 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2316,7 +2316,7 @@ def _concat_same_type(self, to_concat):
 
         return union_categoricals(to_concat)
 
-    def isin(self, values):
+    def isin(self, values) -> np.ndarray:
         """
         Check whether `values` are contained in Categorical.
 
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 02b8ed17244cd..9d6c2789af25b 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -335,7 +335,7 @@ def array(
     return result
 
 
-def extract_array(obj, extract_numpy: bool = False):
+def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike:
     """
     Extract the ndarray or ExtensionArray from a Series or Index.
 
@@ -383,7 +383,9 @@ def extract_array(obj, extract_numpy: bool = False):
     if extract_numpy and isinstance(obj, ABCPandasArray):
         obj = obj.to_numpy()
 
-    return obj
+    # error: Incompatible return value type (got "Index", expected "ExtensionArray")
+    # error: Incompatible return value type (got "Series", expected "ExtensionArray")
+    return obj  # type: ignore[return-value]
 
 
 def sanitize_array(
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 1489e08d82bf0..7c5aafcbbc7e9 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1488,7 +1488,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     if has_bools:
         for t in types:
             if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
-                return object
+                return np.dtype("object")
 
     return np.find_common_type(types, [])
 
@@ -1550,7 +1550,7 @@ def construct_1d_arraylike_from_scalar(
         elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):
             # we need to coerce to object dtype to avoid
             # to allow numpy to take our string as a scalar value
-            dtype = object
+            dtype = np.dtype("object")
             if not isna(value):
                 value = ensure_str(value)
 

From 48b5847f71c0e8935ae1fb3e78bdeaa47871461d Mon Sep 17 00:00:00 2001
From: Jonathan Shreckengost <Jonathanshrek@gmail.com>
Date: Fri, 4 Sep 2020 23:10:49 -0400
Subject: [PATCH 16/71] Comma cleanup (#36082)

---
 .../tests/indexes/datetimes/test_datetime.py   |  2 +-
 .../tests/indexes/datetimes/test_timezones.py  |  2 +-
 .../tests/indexes/multi/test_constructors.py   |  6 +++---
 pandas/tests/indexes/multi/test_isin.py        |  2 +-
 pandas/tests/indexes/test_base.py              |  2 +-
 .../indexes/timedeltas/test_scalar_compat.py   |  8 ++++----
 .../indexes/timedeltas/test_searchsorted.py    |  2 +-
 pandas/tests/indexing/common.py                |  4 +---
 pandas/tests/indexing/test_callable.py         | 18 ++++++------------
 pandas/tests/indexing/test_check_indexer.py    |  8 +++-----
 pandas/tests/indexing/test_coercion.py         |  2 +-
 pandas/tests/indexing/test_floats.py           | 14 ++++----------
 12 files changed, 27 insertions(+), 43 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index 7bb1d98086a91..8e2ac4feb7ded 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -51,7 +51,7 @@ def test_reindex_with_same_tz(self):
             "2010-01-02 00:00:00",
         ]
         expected1 = DatetimeIndex(
-            expected_list1, dtype="datetime64[ns, UTC]", freq=None,
+            expected_list1, dtype="datetime64[ns, UTC]", freq=None
         )
         expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
         tm.assert_index_equal(result1, expected1)
diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py
index ea68e8759c123..233835bb4b5f7 100644
--- a/pandas/tests/indexes/datetimes/test_timezones.py
+++ b/pandas/tests/indexes/datetimes/test_timezones.py
@@ -799,7 +799,7 @@ def test_dti_from_tzaware_datetime(self, tz):
 
     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
     def test_dti_tz_constructors(self, tzstr):
-        """ Test different DatetimeIndex constructions with timezone
+        """Test different DatetimeIndex constructions with timezone
         Follow-up of GH#4229
         """
         arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"]
diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py
index 1157c7f8bb962..16af884c89e9e 100644
--- a/pandas/tests/indexes/multi/test_constructors.py
+++ b/pandas/tests/indexes/multi/test_constructors.py
@@ -741,18 +741,18 @@ def test_raise_invalid_sortorder():
 
     with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
         MultiIndex(
-            levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2,
+            levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
         )
 
     with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
         MultiIndex(
-            levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1,
+            levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
         )
 
 
 def test_datetimeindex():
     idx1 = pd.DatetimeIndex(
-        ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo",
+        ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
     )
     idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern")
     idx = MultiIndex.from_arrays([idx1, idx2])
diff --git a/pandas/tests/indexes/multi/test_isin.py b/pandas/tests/indexes/multi/test_isin.py
index 122263e6ec198..b369b9a50954e 100644
--- a/pandas/tests/indexes/multi/test_isin.py
+++ b/pandas/tests/indexes/multi/test_isin.py
@@ -78,7 +78,7 @@ def test_isin_level_kwarg():
 @pytest.mark.parametrize(
     "labels,expected,level",
     [
-        ([("b", np.nan)], np.array([False, False, True]), None,),
+        ([("b", np.nan)], np.array([False, False, True]), None),
         ([np.nan, "a"], np.array([True, True, False]), 0),
         (["d", np.nan], np.array([False, True, True]), 1),
     ],
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index aee4b16621b4d..7720db9d98ebf 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -2426,7 +2426,7 @@ def test_index_with_tuple_bool(self):
         # TODO: remove tupleize_cols=False once correct behaviour is restored
         # TODO: also this op right now produces FutureWarning from numpy
         idx = Index([("a", "b"), ("b", "c"), ("c", "a")], tupleize_cols=False)
-        result = idx == ("c", "a",)
+        result = idx == ("c", "a")
         expected = np.array([False, False, True])
         tm.assert_numpy_array_equal(result, expected)
 
diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py
index 16c19b8d00380..6a2238d90b590 100644
--- a/pandas/tests/indexes/timedeltas/test_scalar_compat.py
+++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py
@@ -104,18 +104,18 @@ def test_round(self):
                 "L",
                 t1a,
                 TimedeltaIndex(
-                    ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"],
+                    ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"]
                 ),
             ),
             (
                 "S",
                 t1a,
                 TimedeltaIndex(
-                    ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"],
+                    ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"]
                 ),
             ),
-            ("12T", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"],),),
-            ("H", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"],),),
+            ("12T", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])),
+            ("H", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])),
             ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")),
         ]:
 
diff --git a/pandas/tests/indexes/timedeltas/test_searchsorted.py b/pandas/tests/indexes/timedeltas/test_searchsorted.py
index 4806a9acff96f..3cf45931cf6b7 100644
--- a/pandas/tests/indexes/timedeltas/test_searchsorted.py
+++ b/pandas/tests/indexes/timedeltas/test_searchsorted.py
@@ -17,7 +17,7 @@ def test_searchsorted_different_argument_classes(self, klass):
         tm.assert_numpy_array_equal(result, expected)
 
     @pytest.mark.parametrize(
-        "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2],
+        "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
     )
     def test_searchsorted_invalid_argument_dtype(self, arg):
         idx = TimedeltaIndex(["1 day", "2 days", "3 days"])
diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py
index 9cc031001f81c..656d25bec2a6b 100644
--- a/pandas/tests/indexing/common.py
+++ b/pandas/tests/indexing/common.py
@@ -144,9 +144,7 @@ def check_values(self, f, func, values=False):
 
             tm.assert_almost_equal(result, expected)
 
-    def check_result(
-        self, method, key, typs=None, axes=None, fails=None,
-    ):
+    def check_result(self, method, key, typs=None, axes=None, fails=None):
         def _eq(axis, obj, key):
             """ compare equal for these 2 keys """
             axified = _axify(obj, key, axis)
diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py
index 621417eb38d94..bf51c3e5d1695 100644
--- a/pandas/tests/indexing/test_callable.py
+++ b/pandas/tests/indexing/test_callable.py
@@ -17,15 +17,11 @@ def test_frame_loc_callable(self):
         res = df.loc[lambda x: x.A > 2]
         tm.assert_frame_equal(res, df.loc[df.A > 2])
 
-        res = df.loc[
-            lambda x: x.A > 2,
-        ]  # noqa: E231
-        tm.assert_frame_equal(res, df.loc[df.A > 2,])  # noqa: E231
+        res = df.loc[lambda x: x.A > 2]  # noqa: E231
+        tm.assert_frame_equal(res, df.loc[df.A > 2])  # noqa: E231
 
-        res = df.loc[
-            lambda x: x.A > 2,
-        ]  # noqa: E231
-        tm.assert_frame_equal(res, df.loc[df.A > 2,])  # noqa: E231
+        res = df.loc[lambda x: x.A > 2]  # noqa: E231
+        tm.assert_frame_equal(res, df.loc[df.A > 2])  # noqa: E231
 
         res = df.loc[lambda x: x.B == "b", :]
         tm.assert_frame_equal(res, df.loc[df.B == "b", :])
@@ -94,10 +90,8 @@ def test_frame_loc_callable_labels(self):
         res = df.loc[lambda x: ["A", "C"]]
         tm.assert_frame_equal(res, df.loc[["A", "C"]])
 
-        res = df.loc[
-            lambda x: ["A", "C"],
-        ]  # noqa: E231
-        tm.assert_frame_equal(res, df.loc[["A", "C"],])  # noqa: E231
+        res = df.loc[lambda x: ["A", "C"]]  # noqa: E231
+        tm.assert_frame_equal(res, df.loc[["A", "C"]])  # noqa: E231
 
         res = df.loc[lambda x: ["A", "C"], :]
         tm.assert_frame_equal(res, df.loc[["A", "C"], :])
diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py
index 69d4065234d93..865ecb129cdfa 100644
--- a/pandas/tests/indexing/test_check_indexer.py
+++ b/pandas/tests/indexing/test_check_indexer.py
@@ -32,7 +32,7 @@ def test_valid_input(indexer, expected):
 
 
 @pytest.mark.parametrize(
-    "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")],
+    "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")]
 )
 def test_boolean_na_returns_indexer(indexer):
     # https://github.com/pandas-dev/pandas/issues/31503
@@ -61,7 +61,7 @@ def test_bool_raise_length(indexer):
 
 
 @pytest.mark.parametrize(
-    "indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")],
+    "indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")]
 )
 def test_int_raise_missing_values(indexer):
     array = np.array([1, 2, 3])
@@ -89,9 +89,7 @@ def test_raise_invalid_array_dtypes(indexer):
         check_array_indexer(array, indexer)
 
 
-@pytest.mark.parametrize(
-    "indexer", [None, Ellipsis, slice(0, 3), (None,)],
-)
+@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)])
 def test_pass_through_non_array_likes(indexer):
     array = np.array([1, 2, 3])
 
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 1c5f00ff754a4..752ecd47fe089 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -87,7 +87,7 @@ def _assert_setitem_series_conversion(
         # tm.assert_series_equal(temp, expected_series)
 
     @pytest.mark.parametrize(
-        "val,exp_dtype", [(1, object), (1.1, object), (1 + 1j, object), (True, object)],
+        "val,exp_dtype", [(1, object), (1.1, object), (1 + 1j, object), (True, object)]
     )
     def test_setitem_series_object(self, val, exp_dtype):
         obj = pd.Series(list("abcd"))
diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
index 18b9898e7d800..c48e0a129e161 100644
--- a/pandas/tests/indexing/test_floats.py
+++ b/pandas/tests/indexing/test_floats.py
@@ -181,9 +181,7 @@ def test_scalar_with_mixed(self):
         expected = 3
         assert result == expected
 
-    @pytest.mark.parametrize(
-        "index_func", [tm.makeIntIndex, tm.makeRangeIndex],
-    )
+    @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
     @pytest.mark.parametrize("klass", [Series, DataFrame])
     def test_scalar_integer(self, index_func, klass):
 
@@ -405,7 +403,7 @@ def test_slice_integer(self):
 
     @pytest.mark.parametrize("l", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
     def test_integer_positional_indexing(self, l):
-        """ make sure that we are raising on positional indexing
+        """make sure that we are raising on positional indexing
         w.r.t. an integer index
         """
         s = Series(range(2, 6), index=range(2, 6))
@@ -425,9 +423,7 @@ def test_integer_positional_indexing(self, l):
         with pytest.raises(TypeError, match=msg):
             s.iloc[l]
 
-    @pytest.mark.parametrize(
-        "index_func", [tm.makeIntIndex, tm.makeRangeIndex],
-    )
+    @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
     def test_slice_integer_frame_getitem(self, index_func):
 
         # similar to above, but on the getitem dim (of a DataFrame)
@@ -486,9 +482,7 @@ def test_slice_integer_frame_getitem(self, index_func):
                 s[l]
 
     @pytest.mark.parametrize("l", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
-    @pytest.mark.parametrize(
-        "index_func", [tm.makeIntIndex, tm.makeRangeIndex],
-    )
+    @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
     def test_float_slice_getitem_with_integer_index_raises(self, l, index_func):
 
         # similar to above, but on the getitem dim (of a DataFrame)

From 4abfaea1a6212162f3059e164ebd7bdcd28c9ae4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 4 Sep 2020 20:11:39 -0700
Subject: [PATCH 17/71] CLN: remove unused args/kwargs (#36129)

---
 pandas/core/groupby/generic.py | 1 +
 pandas/core/groupby/groupby.py | 2 ++
 pandas/core/groupby/ops.py     | 8 ++++----
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 0a04ac54a0c66..01c186473a065 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1121,6 +1121,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
                     assert how == "ohlc"
                     raise
 
+                # We get here with a) EADtypes and b) object dtype
                 obj: Union[Series, DataFrame]
                 # call our grouper again with only this block
                 if isinstance(bvalues, ExtensionArray):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 651af2d314251..6ef2e67030881 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1012,6 +1012,8 @@ def _agg_general(
                     # raised in _get_cython_function, in some cases can
                     #  be trimmed by implementing cython funcs for more dtypes
                     pass
+                else:
+                    raise
 
             # apply a non-cython aggregation
             result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 0688d90a1019d..3ba3c8a0eddc8 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -608,7 +608,7 @@ def _transform(
 
         return result
 
-    def agg_series(self, obj: Series, func: F, *args, **kwargs):
+    def agg_series(self, obj: Series, func: F):
         # Caller is responsible for checking ngroups != 0
         assert self.ngroups != 0
 
@@ -660,7 +660,7 @@ def _aggregate_series_fast(self, obj: Series, func: F):
         result, counts = grouper.get_result()
         return result, counts
 
-    def _aggregate_series_pure_python(self, obj: Series, func: F, *args, **kwargs):
+    def _aggregate_series_pure_python(self, obj: Series, func: F):
         group_index, _, ngroups = self.group_info
 
         counts = np.zeros(ngroups, dtype=int)
@@ -669,7 +669,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F, *args, **kwargs):
         splitter = get_splitter(obj, group_index, ngroups, axis=0)
 
         for label, group in splitter:
-            res = func(group, *args, **kwargs)
+            res = func(group)
 
             if result is None:
                 if isinstance(res, (Series, Index, np.ndarray)):
@@ -846,7 +846,7 @@ def groupings(self) -> "List[grouper.Grouping]":
             for lvl, name in zip(self.levels, self.names)
         ]
 
-    def agg_series(self, obj: Series, func: F, *args, **kwargs):
+    def agg_series(self, obj: Series, func: F):
         # Caller is responsible for checking ngroups != 0
         assert self.ngroups != 0
         assert len(self.bins) > 0  # otherwise we'd get IndexError in get_result

From 3b4be0243326f969d8b4227caa1ceb12706d6f07 Mon Sep 17 00:00:00 2001
From: David Kwong <cwkwong@users.noreply.github.com>
Date: Sat, 5 Sep 2020 13:15:03 +1000
Subject: [PATCH 18/71] BUG: Fix DataFrame.groupby().apply() for NaN groups
 with dropna=False  (#35951)

---
 doc/source/whatsnew/v1.2.0.rst              |  3 +-
 pandas/core/reshape/concat.py               |  6 ++-
 pandas/tests/groupby/test_groupby_dropna.py | 53 +++++++++++++++++++++
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index e65daa439a225..aa3255e673797 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -214,7 +214,8 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-
+- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
+-
 
 Categorical
 ^^^^^^^^^^^
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 299b68c6e71e0..9b94dae8556f6 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -11,6 +11,7 @@
 
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays.categorical import (
     factorize_from_iterable,
@@ -624,10 +625,11 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
         for hlevel, level in zip(zipped, levels):
             to_concat = []
             for key, index in zip(hlevel, indexes):
-                mask = level == key
+                # Find matching codes, include matching nan values as equal.
+                mask = (isna(level) & isna(key)) | (level == key)
                 if not mask.any():
                     raise ValueError(f"Key {key} not in level {level}")
-                i = np.nonzero(level == key)[0][0]
+                i = np.nonzero(mask)[0][0]
 
                 to_concat.append(np.repeat(i, len(index)))
             codes_list.append(np.concatenate(to_concat))
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index d1501111cb22b..66db06eeebdfb 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -274,3 +274,56 @@ def test_groupby_dropna_datetime_like_data(
     expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt"))
 
     tm.assert_frame_equal(grouped, expected)
+
+
+@pytest.mark.parametrize(
+    "dropna, data, selected_data, levels",
+    [
+        pytest.param(
+            False,
+            {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            ["a", "b", np.nan],
+            id="dropna_false_has_nan",
+        ),
+        pytest.param(
+            True,
+            {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0]},
+            None,
+            id="dropna_true_has_nan",
+        ),
+        pytest.param(
+            # no nan in "groups"; dropna=True|False should be same.
+            False,
+            {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            None,
+            id="dropna_false_no_nan",
+        ),
+        pytest.param(
+            # no nan in "groups"; dropna=True|False should be same.
+            True,
+            {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            None,
+            id="dropna_true_no_nan",
+        ),
+    ],
+)
+def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, levels):
+    # GH 35889
+
+    df = pd.DataFrame(data)
+    gb = df.groupby("groups", dropna=dropna)
+    result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))
+
+    mi_tuples = tuple(zip(data["groups"], selected_data["values"]))
+    mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None])
+    # Since right now, by default MI will drop NA from levels when we create MI
+    # via `from_*`, so we need to add NA for level manually afterwards.
+    if not dropna and levels:
+        mi = mi.set_levels(levels, level="groups")
+
+    expected = pd.DataFrame(selected_data, index=mi)
+    tm.assert_frame_equal(result, expected)

From fb18f47bce85d6090ad34d9788684387024fb98d Mon Sep 17 00:00:00 2001
From: patrick <61934744+phofl@users.noreply.github.com>
Date: Sat, 5 Sep 2020 05:18:12 +0200
Subject: [PATCH 19/71] Bug 29764 groupby loses index name sometimes (#36121)

---
 doc/source/whatsnew/v1.2.0.rst       |  1 +
 pandas/core/groupby/generic.py       |  1 +
 pandas/tests/groupby/test_groupby.py | 23 +++++++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index aa3255e673797..3b252202c14c5 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -312,6 +312,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
 - Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
+- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 -
 
 Reshaping
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 01c186473a065..9fda3c7a6b354 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1732,6 +1732,7 @@ def _wrap_transformed_output(
         """
         indexed_output = {key.position: val for key, val in output.items()}
         columns = Index(key.label for key in output)
+        columns.name = self.obj.columns.name
 
         result = self.obj._constructor(indexed_output)
         result.columns = columns
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index eec9e8064d584..e0196df7ceac0 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2111,3 +2111,26 @@ def test_subsetting_columns_keeps_attrs(klass, attr, value):
     expected = df.groupby("a", **{attr: value})
     result = expected[["b"]] if klass is DataFrame else expected["b"]
     assert getattr(result, attr) == getattr(expected, attr)
+
+
+@pytest.mark.parametrize("func", ["sum", "any", "shift"])
+def test_groupby_column_index_name_lost(func):
+    # GH: 29764 groupby loses index sometimes
+    expected = pd.Index(["a"], name="idx")
+    df = pd.DataFrame([[1]], columns=expected)
+    df_grouped = df.groupby([1])
+    result = getattr(df_grouped, func)().columns
+    tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize("func", ["ffill", "bfill"])
+def test_groupby_column_index_name_lost_fill_funcs(func):
+    # GH: 29764 groupby loses index sometimes
+    df = pd.DataFrame(
+        [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]],
+        columns=pd.Index(["type", "a", "b"], name="idx"),
+    )
+    df_grouped = df.groupby(["type"])[["a", "b"]]
+    result = getattr(df_grouped, func)().columns
+    expected = pd.Index(["a", "b"], name="idx")
+    tm.assert_index_equal(result, expected)

From d26090d035616ef2879ecc28d9497c8f64040537 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 5 Sep 2020 04:18:59 +0100
Subject: [PATCH 20/71] STY: add code check for use of builtin filter function
 (#36089)

---
 ci/code_checks.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2e0f27fefca0b..6006d09bc3e78 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -179,6 +179,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of builtin filter function' ; echo $MSG
+    invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # Check for the following code in testing: `np.testing` and `np.array_equal`
     MSG='Check for invalid testing' ; echo $MSG
     invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/

From 97ed70662f9c2be33de679d9d9f3bf96ce6abefe Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 4 Sep 2020 20:21:49 -0700
Subject: [PATCH 21/71] BUG: df.replace with numeric values and str to_replace
 (#36093)

---
 doc/source/user_guide/missing_data.rst      |  26 -----
 doc/source/whatsnew/v1.2.0.rst              |   1 +
 pandas/core/array_algos/replace.py          |  95 ++++++++++++++++++
 pandas/core/generic.py                      |  14 ---
 pandas/core/internals/blocks.py             |  27 ++++-
 pandas/core/internals/managers.py           | 104 +-------------------
 pandas/tests/frame/methods/test_replace.py  |  15 ++-
 pandas/tests/series/methods/test_replace.py |   5 +-
 8 files changed, 136 insertions(+), 151 deletions(-)
 create mode 100644 pandas/core/array_algos/replace.py

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
index 2e68a0598bb71..28206192dd161 100644
--- a/doc/source/user_guide/missing_data.rst
+++ b/doc/source/user_guide/missing_data.rst
@@ -689,32 +689,6 @@ You can also operate on the DataFrame in place:
 
    df.replace(1.5, np.nan, inplace=True)
 
-.. warning::
-
-   When replacing multiple ``bool`` or ``datetime64`` objects, the first
-   argument to ``replace`` (``to_replace``) must match the type of the value
-   being replaced. For example,
-
-   .. code-block:: python
-
-      >>> s = pd.Series([True, False, True])
-      >>> s.replace({'a string': 'new value', True: False})  # raises
-      TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
-
-   will raise a ``TypeError`` because one of the ``dict`` keys is not of the
-   correct type for replacement.
-
-   However, when replacing a *single* object such as,
-
-   .. ipython:: python
-
-      s = pd.Series([True, False, True])
-      s.replace('a string', 'another string')
-
-   the original ``NDFrame`` object will be returned untouched. We're working on
-   unifying this API, but for backwards compatibility reasons we cannot break
-   the latter behavior. See :issue:`6354` for more details.
-
 Missing data casting rules and indexing
 ---------------------------------------
 
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 3b252202c14c5..8b28a4439e1da 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -339,6 +339,7 @@ ExtensionArray
 Other
 ^^^^^
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
+- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
new file mode 100644
index 0000000000000..6ac3cc1f9f2fe
--- /dev/null
+++ b/pandas/core/array_algos/replace.py
@@ -0,0 +1,95 @@
+"""
+Methods used by Block.replace and related methods.
+"""
+import operator
+import re
+from typing import Optional, Pattern, Union
+
+import numpy as np
+
+from pandas._typing import ArrayLike, Scalar
+
+from pandas.core.dtypes.common import (
+    is_datetimelike_v_numeric,
+    is_numeric_v_string_like,
+    is_scalar,
+)
+from pandas.core.dtypes.missing import isna
+
+
+def compare_or_regex_search(
+    a: ArrayLike,
+    b: Union[Scalar, Pattern],
+    regex: bool = False,
+    mask: Optional[ArrayLike] = None,
+) -> Union[ArrayLike, bool]:
+    """
+    Compare two array_like inputs of the same shape or two scalar values
+
+    Calls operator.eq or re.search, depending on regex argument. If regex is
+    True, perform an element-wise regex matching.
+
+    Parameters
+    ----------
+    a : array_like
+    b : scalar or regex pattern
+    regex : bool, default False
+    mask : array_like or None (default)
+
+    Returns
+    -------
+    mask : array_like of bool
+    """
+
+    def _check_comparison_types(
+        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
+    ):
+        """
+        Raises an error if the two arrays (a,b) cannot be compared.
+        Otherwise, returns the comparison result as expected.
+        """
+        if is_scalar(result) and isinstance(a, np.ndarray):
+            type_names = [type(a).__name__, type(b).__name__]
+
+            if isinstance(a, np.ndarray):
+                type_names[0] = f"ndarray(dtype={a.dtype})"
+
+            raise TypeError(
+                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
+            )
+
+    if not regex:
+        op = lambda x: operator.eq(x, b)
+    else:
+        op = np.vectorize(
+            lambda x: bool(re.search(b, x))
+            if isinstance(x, str) and isinstance(b, (str, Pattern))
+            else False
+        )
+
+    # GH#32621 use mask to avoid comparing to NAs
+    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
+        mask = np.reshape(~(isna(a)), a.shape)
+    if isinstance(a, np.ndarray):
+        a = a[mask]
+
+    if is_numeric_v_string_like(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        return np.zeros(a.shape, dtype=bool)
+
+    elif is_datetimelike_v_numeric(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        _check_comparison_types(False, a, b)
+        return False
+
+    result = op(a)
+
+    if isinstance(result, np.ndarray) and mask is not None:
+        # The shape of the mask can differ to that of the result
+        # since we may compare only a subset of a's or b's elements
+        tmp = np.zeros(mask.shape, dtype=np.bool_)
+        tmp[mask] = result
+        result = tmp
+
+    _check_comparison_types(result, a, b)
+    return result
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2af323ccc1dd3..93c945638a174 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6561,20 +6561,6 @@ def replace(
         1   new  new
         2  bait  xyz
 
-        Note that when replacing multiple ``bool`` or ``datetime64`` objects,
-        the data types in the `to_replace` parameter must match the data
-        type of the value being replaced:
-
-        >>> df = pd.DataFrame({{'A': [True, False, True],
-        ...                    'B': [False, True, False]}})
-        >>> df.replace({{'a string': 'new value', True: False}})  # raises
-        Traceback (most recent call last):
-            ...
-        TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
-
-        This raises a ``TypeError`` because one of the ``dict`` keys is not of
-        the correct type for replacement.
-
         Compare the behavior of ``s.replace({{'a': None}})`` and
         ``s.replace('a', None)`` to understand the peculiarities
         of the `to_replace` parameter:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index b2305736f9d46..3bcd4debbf41a 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -11,7 +11,7 @@
 from pandas._libs.internals import BlockPlacement
 from pandas._libs.tslibs import conversion
 from pandas._libs.tslibs.timezones import tz_compare
-from pandas._typing import ArrayLike
+from pandas._typing import ArrayLike, Scalar
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
@@ -59,6 +59,7 @@
 from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat
 
 import pandas.core.algorithms as algos
+from pandas.core.array_algos.replace import compare_or_regex_search
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays import (
     Categorical,
@@ -792,7 +793,6 @@ def _replace_list(
         self,
         src_list: List[Any],
         dest_list: List[Any],
-        masks: List[np.ndarray],
         inplace: bool = False,
         regex: bool = False,
     ) -> List["Block"]:
@@ -801,11 +801,28 @@ def _replace_list(
         """
         src_len = len(src_list) - 1
 
+        def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
+            """
+            Generate a bool array by perform an equality check, or perform
+            an element-wise regular expression matching
+            """
+            if isna(s):
+                return ~mask
+
+            s = com.maybe_box_datetimelike(s)
+            return compare_or_regex_search(self.values, s, regex, mask)
+
+        # Calculate the mask once, prior to the call of comp
+        # in order to avoid repeating the same computations
+        mask = ~isna(self.values)
+
+        masks = [comp(s, mask, regex) for s in src_list]
+
         rb = [self if inplace else self.copy()]
         for i, (src, dest) in enumerate(zip(src_list, dest_list)):
             new_rb: List["Block"] = []
             for blk in rb:
-                m = masks[i][blk.mgr_locs.indexer]
+                m = masks[i]
                 convert = i == src_len  # only convert once at the end
                 result = blk._replace_coerce(
                     mask=m,
@@ -2908,7 +2925,9 @@ def _extract_bool_array(mask: ArrayLike) -> np.ndarray:
     """
     if isinstance(mask, ExtensionArray):
         # We could have BooleanArray, Sparse[bool], ...
-        mask = np.asarray(mask, dtype=np.bool_)
+        #  Except for BooleanArray, this is equivalent to just
+        #  np.asarray(mask, dtype=bool)
+        mask = mask.to_numpy(dtype=bool, na_value=False)
 
     assert isinstance(mask, np.ndarray), type(mask)
     assert mask.dtype == bool, mask.dtype
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 753b949f7c802..57a4a8c2ace8a 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1,14 +1,11 @@
 from collections import defaultdict
 import itertools
-import operator
-import re
 from typing import (
     Any,
     DefaultDict,
     Dict,
     List,
     Optional,
-    Pattern,
     Sequence,
     Tuple,
     TypeVar,
@@ -19,7 +16,7 @@
 import numpy as np
 
 from pandas._libs import internals as libinternals, lib
-from pandas._typing import ArrayLike, DtypeObj, Label, Scalar
+from pandas._typing import ArrayLike, DtypeObj, Label
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
@@ -29,12 +26,9 @@
 )
 from pandas.core.dtypes.common import (
     DT64NS_DTYPE,
-    is_datetimelike_v_numeric,
     is_dtype_equal,
     is_extension_array_dtype,
     is_list_like,
-    is_numeric_v_string_like,
-    is_scalar,
 )
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -44,7 +38,6 @@
 import pandas.core.algorithms as algos
 from pandas.core.arrays.sparse import SparseDtype
 from pandas.core.base import PandasObject
-import pandas.core.common as com
 from pandas.core.construction import extract_array
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.api import Index, ensure_index
@@ -628,31 +621,10 @@ def replace_list(
         """ do a list replace """
         inplace = validate_bool_kwarg(inplace, "inplace")
 
-        # figure out our mask apriori to avoid repeated replacements
-        values = self.as_array()
-
-        def comp(s: Scalar, mask: np.ndarray, regex: bool = False):
-            """
-            Generate a bool array by perform an equality check, or perform
-            an element-wise regular expression matching
-            """
-            if isna(s):
-                return ~mask
-
-            s = com.maybe_box_datetimelike(s)
-            return _compare_or_regex_search(values, s, regex, mask)
-
-        # Calculate the mask once, prior to the call of comp
-        # in order to avoid repeating the same computations
-        mask = ~isna(values)
-
-        masks = [comp(s, mask, regex) for s in src_list]
-
         bm = self.apply(
             "_replace_list",
             src_list=src_list,
             dest_list=dest_list,
-            masks=masks,
             inplace=inplace,
             regex=regex,
         )
@@ -1900,80 +1872,6 @@ def _merge_blocks(
     return blocks
 
 
-def _compare_or_regex_search(
-    a: ArrayLike,
-    b: Union[Scalar, Pattern],
-    regex: bool = False,
-    mask: Optional[ArrayLike] = None,
-) -> Union[ArrayLike, bool]:
-    """
-    Compare two array_like inputs of the same shape or two scalar values
-
-    Calls operator.eq or re.search, depending on regex argument. If regex is
-    True, perform an element-wise regex matching.
-
-    Parameters
-    ----------
-    a : array_like
-    b : scalar or regex pattern
-    regex : bool, default False
-    mask : array_like or None (default)
-
-    Returns
-    -------
-    mask : array_like of bool
-    """
-
-    def _check_comparison_types(
-        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
-    ):
-        """
-        Raises an error if the two arrays (a,b) cannot be compared.
-        Otherwise, returns the comparison result as expected.
-        """
-        if is_scalar(result) and isinstance(a, np.ndarray):
-            type_names = [type(a).__name__, type(b).__name__]
-
-            if isinstance(a, np.ndarray):
-                type_names[0] = f"ndarray(dtype={a.dtype})"
-
-            raise TypeError(
-                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
-            )
-
-    if not regex:
-        op = lambda x: operator.eq(x, b)
-    else:
-        op = np.vectorize(
-            lambda x: bool(re.search(b, x))
-            if isinstance(x, str) and isinstance(b, (str, Pattern))
-            else False
-        )
-
-    # GH#32621 use mask to avoid comparing to NAs
-    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
-        mask = np.reshape(~(isna(a)), a.shape)
-    if isinstance(a, np.ndarray):
-        a = a[mask]
-
-    if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
-        # GH#29553 avoid deprecation warnings from numpy
-        _check_comparison_types(False, a, b)
-        return False
-
-    result = op(a)
-
-    if isinstance(result, np.ndarray) and mask is not None:
-        # The shape of the mask can differ to that of the result
-        # since we may compare only a subset of a's or b's elements
-        tmp = np.zeros(mask.shape, dtype=np.bool_)
-        tmp[mask] = result
-        result = tmp
-
-    _check_comparison_types(result, a, b)
-    return result
-
-
 def _fast_count_smallints(arr: np.ndarray) -> np.ndarray:
     """Faster version of set(arr) for sequences of small numbers."""
     counts = np.bincount(arr.astype(np.int_))
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 83dfd42ae2a6e..ea2488dfc0877 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1131,8 +1131,19 @@ def test_replace_bool_with_bool(self):
 
     def test_replace_with_dict_with_bool_keys(self):
         df = DataFrame({0: [True, False], 1: [False, True]})
-        with pytest.raises(TypeError, match="Cannot compare types .+"):
-            df.replace({"asdf": "asdb", True: "yes"})
+        result = df.replace({"asdf": "asdb", True: "yes"})
+        expected = DataFrame({0: ["yes", False], 1: [False, "yes"]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_replace_dict_strings_vs_ints(self):
+        # GH#34789
+        df = pd.DataFrame({"Y0": [1, 2], "Y1": [3, 4]})
+        result = df.replace({"replace_string": "test"})
+
+        tm.assert_frame_equal(result, df)
+
+        result = df["Y0"].replace({"replace_string": "test"})
+        tm.assert_series_equal(result, df["Y0"])
 
     def test_replace_truthy(self):
         df = DataFrame({"a": [True, True]})
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index ccaa005369a1c..e255d46e81851 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -218,8 +218,9 @@ def test_replace_bool_with_bool(self):
 
     def test_replace_with_dict_with_bool_keys(self):
         s = pd.Series([True, False, True])
-        with pytest.raises(TypeError, match="Cannot compare types .+"):
-            s.replace({"asdf": "asdb", True: "yes"})
+        result = s.replace({"asdf": "asdb", True: "yes"})
+        expected = pd.Series(["yes", False, "yes"])
+        tm.assert_series_equal(result, expected)
 
     def test_replace2(self):
         N = 100

From 0569e29f3bfbb05cbd41ba3faae30571e7059ef8 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Fri, 4 Sep 2020 22:33:49 -0500
Subject: [PATCH 22/71] CLN: resolve UserWarning in
 `pandas/plotting/_matplotlib/core.py` #35945 (#35946)

---
 pandas/plotting/_matplotlib/core.py |  2 +-
 pandas/tests/plotting/test_frame.py | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index c1ba7881165f1..f0b35e1cd2a74 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1223,8 +1223,8 @@ def get_label(i):
         if self._need_to_set_index:
             xticks = ax.get_xticks()
             xticklabels = [get_label(x) for x in xticks]
-            ax.set_xticklabels(xticklabels)
             ax.xaxis.set_major_locator(FixedLocator(xticks))
+            ax.set_xticklabels(xticklabels)
 
         condition = (
             not self._use_dynamic_x()
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index ee43e5d7072fe..9ab697cb57690 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -2796,10 +2796,12 @@ def test_table(self):
         _check_plot_works(df.plot, table=True)
         _check_plot_works(df.plot, table=df)
 
-        ax = df.plot()
-        assert len(ax.tables) == 0
-        plotting.table(ax, df.T)
-        assert len(ax.tables) == 1
+        # GH 35945 UserWarning
+        with tm.assert_produces_warning(None):
+            ax = df.plot()
+            assert len(ax.tables) == 0
+            plotting.table(ax, df.T)
+            assert len(ax.tables) == 1
 
     def test_errorbar_scatter(self):
         df = DataFrame(np.random.randn(5, 2), index=range(5), columns=["x", "y"])

From 7b2d4370c42187ba6be770824d041525d91968f8 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com>
Date: Sat, 5 Sep 2020 06:50:57 -0400
Subject: [PATCH 23/71] add note about missing values to Categorical docstring
 (#36125)

---
 pandas/core/arrays/categorical.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ec85ec47d625c..c3c9009dda659 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -280,6 +280,19 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject):
     ['a', 'b', 'c', 'a', 'b', 'c']
     Categories (3, object): ['a', 'b', 'c']
 
+    Missing values are not included as a category.
+
+    >>> c = pd.Categorical([1, 2, 3, 1, 2, 3, np.nan])
+    >>> c
+    [1, 2, 3, 1, 2, 3, NaN]
+    Categories (3, int64): [1, 2, 3]
+
+    However, their presence is indicated in the `codes` attribute
+    by code `-1`.
+
+    >>> c.codes
+    array([ 0,  1,  2,  0,  1,  2, -1], dtype=int8)
+
     Ordered `Categoricals` can be sorted according to the custom order
     of the categories and can have a min and max value.
 

From 7e906865ba6bd005c3a11ba199693b730749a8d0 Mon Sep 17 00:00:00 2001
From: Sarthak Vineet Kumar <sarthakchaudhary13@gmail.com>
Date: Sat, 5 Sep 2020 18:09:11 +0530
Subject: [PATCH 24/71] CLN removing trailing commas (#36101)

---
 pandas/tests/io/test_sql.py                            |  3 ---
 pandas/tests/io/test_stata.py                          |  4 ++--
 pandas/tests/plotting/test_frame.py                    |  4 ++--
 pandas/tests/resample/test_datetime_index.py           | 10 ++++------
 .../tests/reshape/merge/test_merge_index_as_string.py  |  4 ++--
 pandas/tests/reshape/test_crosstab.py                  |  4 ++--
 pandas/tests/reshape/test_get_dummies.py               |  2 +-
 7 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index a7e3162ed7b73..1edcc937f72c3 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2349,9 +2349,6 @@ def date_format(dt):
 
 
 def format_query(sql, *args):
-    """
-
-    """
     processed_args = []
     for arg in args:
         if isinstance(arg, float) and isna(arg):
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 6d7fec803a8e0..88f61390957a6 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -1153,7 +1153,7 @@ def test_read_chunks_117(
             from_frame = parsed.iloc[pos : pos + chunksize, :].copy()
             from_frame = self._convert_categorical(from_frame)
             tm.assert_frame_equal(
-                from_frame, chunk, check_dtype=False, check_datetimelike_compat=True,
+                from_frame, chunk, check_dtype=False, check_datetimelike_compat=True
             )
 
             pos += chunksize
@@ -1251,7 +1251,7 @@ def test_read_chunks_115(
             from_frame = parsed.iloc[pos : pos + chunksize, :].copy()
             from_frame = self._convert_categorical(from_frame)
             tm.assert_frame_equal(
-                from_frame, chunk, check_dtype=False, check_datetimelike_compat=True,
+                from_frame, chunk, check_dtype=False, check_datetimelike_compat=True
             )
 
             pos += chunksize
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 9ab697cb57690..128a7bdb6730a 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -1321,7 +1321,7 @@ def test_scatter_with_c_column_name_with_colors(self, cmap):
 
     def test_plot_scatter_with_s(self):
         # this refers to GH 32904
-        df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"],)
+        df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"])
 
         ax = df.plot.scatter(x="a", y="b", s="c")
         tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes())
@@ -1716,7 +1716,7 @@ def test_hist_df(self):
     def test_hist_weights(self, weights):
         # GH 33173
         np.random.seed(0)
-        df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100,))))
+        df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100))))
 
         ax1 = _check_plot_works(df.plot, kind="hist", weights=weights)
         ax2 = _check_plot_works(df.plot, kind="hist")
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index e7637a598403f..59a0183304c76 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -124,7 +124,7 @@ def test_resample_integerarray():
 
     result = ts.resample("3T").mean()
     expected = Series(
-        [1, 4, 7], index=pd.date_range("1/1/2000", periods=3, freq="3T"), dtype="Int64",
+        [1, 4, 7], index=pd.date_range("1/1/2000", periods=3, freq="3T"), dtype="Int64"
     )
     tm.assert_series_equal(result, expected)
 
@@ -764,7 +764,7 @@ def test_resample_origin():
 
 
 @pytest.mark.parametrize(
-    "origin", ["invalid_value", "epch", "startday", "startt", "2000-30-30", object()],
+    "origin", ["invalid_value", "epch", "startday", "startt", "2000-30-30", object()]
 )
 def test_resample_bad_origin(origin):
     rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s")
@@ -777,9 +777,7 @@ def test_resample_bad_origin(origin):
         ts.resample("5min", origin=origin)
 
 
-@pytest.mark.parametrize(
-    "offset", ["invalid_value", "12dayys", "2000-30-30", object()],
-)
+@pytest.mark.parametrize("offset", ["invalid_value", "12dayys", "2000-30-30", object()])
 def test_resample_bad_offset(offset):
     rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s")
     ts = Series(np.random.randn(len(rng)), index=rng)
@@ -1595,7 +1593,7 @@ def test_downsample_dst_at_midnight():
         "America/Havana", ambiguous=True
     )
     dti = pd.DatetimeIndex(dti, freq="D")
-    expected = DataFrame([7.5, 28.0, 44.5], index=dti,)
+    expected = DataFrame([7.5, 28.0, 44.5], index=dti)
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py
index 08614d04caf4b..d20d93370ec7e 100644
--- a/pandas/tests/reshape/merge/test_merge_index_as_string.py
+++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py
@@ -29,7 +29,7 @@ def df2():
 
 @pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
 def left_df(request, df1):
-    """ Construct left test DataFrame with specified levels
+    """Construct left test DataFrame with specified levels
     (any of 'outer', 'inner', and 'v1')
     """
     levels = request.param
@@ -41,7 +41,7 @@ def left_df(request, df1):
 
 @pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
 def right_df(request, df2):
-    """ Construct right test DataFrame with specified levels
+    """Construct right test DataFrame with specified levels
     (any of 'outer', 'inner', and 'v2')
     """
     levels = request.param
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index 6f5550a6f8209..1aadcfdc30f1b 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -354,7 +354,7 @@ def test_crosstab_normalize(self):
             crosstab(df.a, df.b, normalize="columns"),
         )
         tm.assert_frame_equal(
-            crosstab(df.a, df.b, normalize=0), crosstab(df.a, df.b, normalize="index"),
+            crosstab(df.a, df.b, normalize=0), crosstab(df.a, df.b, normalize="index")
         )
 
         row_normal_margins = DataFrame(
@@ -377,7 +377,7 @@ def test_crosstab_normalize(self):
             crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins
         )
         tm.assert_frame_equal(
-            crosstab(df.a, df.b, normalize="columns", margins=True), col_normal_margins,
+            crosstab(df.a, df.b, normalize="columns", margins=True), col_normal_margins
         )
         tm.assert_frame_equal(
             crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins
diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py
index c003bfa6a239a..ce13762ea8f86 100644
--- a/pandas/tests/reshape/test_get_dummies.py
+++ b/pandas/tests/reshape/test_get_dummies.py
@@ -161,7 +161,7 @@ def test_get_dummies_unicode(self, sparse):
         s = [e, eacute, eacute]
         res = get_dummies(s, prefix="letter", sparse=sparse)
         exp = DataFrame(
-            {"letter_e": [1, 0, 0], f"letter_{eacute}": [0, 1, 1]}, dtype=np.uint8,
+            {"letter_e": [1, 0, 0], f"letter_{eacute}": [0, 1, 1]}, dtype=np.uint8
         )
         if sparse:
             exp = exp.apply(SparseArray, fill_value=0)

From 0d2b936286475bb4aa24bc81c2418b7b999fd483 Mon Sep 17 00:00:00 2001
From: Thomas Dickson <td7g11@soton.ac.uk>
Date: Sat, 5 Sep 2020 15:44:26 +0100
Subject: [PATCH 25/71] Updated series documentation to close #35406 (#36139)

---
 pandas/core/series.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9d84ce4b9ab2e..d8fdaa2a60252 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -164,9 +164,9 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     index : array-like or Index (1d)
         Values must be hashable and have the same length as `data`.
         Non-unique index values are allowed. Will default to
-        RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
-        sequence are used, the index will override the keys found in the
-        dict.
+        RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like
+        and index is None, then the values in the index are used to
+        reindex the Series after it is created using the keys in the data.
     dtype : str, numpy.dtype, or ExtensionDtype, optional
         Data type for the output Series. If not specified, this will be
         inferred from `data`.

From 2bcc15632b0f1faedef76318dfca4669eed70bfe Mon Sep 17 00:00:00 2001
From: joooeey <lukas.schreiber@gmx.at>
Date: Sat, 5 Sep 2020 16:49:09 +0200
Subject: [PATCH 26/71] BUG: repair 'style' kwd handling in DataFrame.plot
 (#21003) (#33821)

---
 doc/source/whatsnew/v1.2.0.rst       |  2 +-
 pandas/plotting/_matplotlib/core.py  | 27 ++++++++++++++++-----------
 pandas/tests/plotting/test_frame.py  | 18 ++++++++++++++++++
 pandas/tests/plotting/test_series.py |  2 +-
 4 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 8b28a4439e1da..39e53daf516c4 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -299,7 +299,7 @@ I/O
 Plotting
 ^^^^^^^^
 
--
+- Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`)
 -
 
 Groupby/resample/rolling
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index f0b35e1cd2a74..def4a1dc3f5c4 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1,4 +1,3 @@
-import re
 from typing import TYPE_CHECKING, List, Optional, Tuple
 import warnings
 
@@ -55,6 +54,15 @@
     from matplotlib.axis import Axis
 
 
+def _color_in_style(style: str) -> bool:
+    """
+    Check if there is a color letter in the style string.
+    """
+    from matplotlib.colors import BASE_COLORS
+
+    return not set(BASE_COLORS).isdisjoint(style)
+
+
 class MPLPlot:
     """
     Base class for assembling a pandas plot using matplotlib
@@ -200,8 +208,6 @@ def __init__(
         self._validate_color_args()
 
     def _validate_color_args(self):
-        import matplotlib.colors
-
         if (
             "color" in self.kwds
             and self.nseries == 1
@@ -233,13 +239,12 @@ def _validate_color_args(self):
                 styles = [self.style]
             # need only a single match
             for s in styles:
-                for char in s:
-                    if char in matplotlib.colors.BASE_COLORS:
-                        raise ValueError(
-                            "Cannot pass 'style' string with a color symbol and "
-                            "'color' keyword argument. Please use one or the other or "
-                            "pass 'style' without a color symbol"
-                        )
+                if _color_in_style(s):
+                    raise ValueError(
+                        "Cannot pass 'style' string with a color symbol and "
+                        "'color' keyword argument. Please use one or the "
+                        "other or pass 'style' without a color symbol"
+                    )
 
     def _iter_data(self, data=None, keep_index=False, fillna=None):
         if data is None:
@@ -739,7 +744,7 @@ def _apply_style_colors(self, colors, kwds, col_num, label):
                 style = self.style
 
         has_color = "color" in kwds or self.colormap is not None
-        nocolor_style = style is None or re.match("[a-z]+", style) is None
+        nocolor_style = style is None or not _color_in_style(style)
         if (has_color or self.subplots) and nocolor_style:
             if isinstance(colors, dict):
                 kwds["color"] = colors[label]
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 128a7bdb6730a..3b3902647390d 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -205,6 +205,24 @@ def test_color_and_style_arguments(self):
         with pytest.raises(ValueError):
             df.plot(color=["red", "black"], style=["k-", "r--"])
 
+    @pytest.mark.parametrize(
+        "color, expected",
+        [
+            ("green", ["green"] * 4),
+            (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]),
+        ],
+    )
+    def test_color_and_marker(self, color, expected):
+        # GH 21003
+        df = DataFrame(np.random.random((7, 4)))
+        ax = df.plot(color=color, style="d--")
+        # check colors
+        result = [i.get_color() for i in ax.lines]
+        assert result == expected
+        # check markers and linestyles
+        assert all(i.get_linestyle() == "--" for i in ax.lines)
+        assert all(i.get_marker() == "d" for i in ax.lines)
+
     def test_nonnumeric_exclude(self):
         df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]})
         ax = df.plot()
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index c296e2a6278c5..85c06b2e7b748 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -958,7 +958,7 @@ def test_plot_no_numeric_data(self):
     def test_style_single_ok(self):
         s = pd.Series([1, 2])
         ax = s.plot(style="s", color="C3")
-        assert ax.lines[0].get_color() == ["C3"]
+        assert ax.lines[0].get_color() == "C3"
 
     @pytest.mark.parametrize(
         "index_name, old_label, new_label",

From 55bdb16faf90e0e102b1ce45c234e240357c8005 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?=
 <twoertwein@users.noreply.github.com>
Date: Sat, 5 Sep 2020 10:50:03 -0400
Subject: [PATCH 27/71] BUG/ENH: to_pickle/read_pickle support compression for
 file ojects (#35736)

---
 doc/source/whatsnew/v1.2.0.rst |  1 +
 pandas/_typing.py              |  4 ++--
 pandas/core/frame.py           |  4 ++--
 pandas/io/common.py            | 24 +++++++++---------------
 pandas/io/formats/csvs.py      | 15 ++++-----------
 pandas/io/json/_json.py        | 11 ++---------
 pandas/io/parsers.py           | 13 +++++--------
 pandas/io/pickle.py            | 10 ++--------
 pandas/io/stata.py             | 30 +++++-------------------------
 pandas/tests/io/test_pickle.py | 29 +++++++++++++++++++++++++++++
 10 files changed, 61 insertions(+), 80 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 39e53daf516c4..b1229a5d5823d 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -295,6 +295,7 @@ I/O
 - :meth:`to_csv` passes compression arguments for `'gzip'` always to `gzip.GzipFile` (:issue:`28103`)
 - :meth:`to_csv` did not support zip compression for binary file object not having a filename (:issue: `35058`)
 - :meth:`to_csv` and :meth:`read_csv` did not honor `compression` and `encoding` for path-like objects that are internally converted to file-like objects (:issue:`35677`, :issue:`26124`, and :issue:`32392`)
+- :meth:`to_picke` and :meth:`read_pickle` did not support compression for file-objects (:issue:`26237`, :issue:`29054`, and :issue:`29570`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/_typing.py b/pandas/_typing.py
index 74bfc9134c3af..b237013ac7805 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -116,7 +116,7 @@
 
 
 # compression keywords and compression
-CompressionDict = Mapping[str, Optional[Union[str, int, bool]]]
+CompressionDict = Dict[str, Any]
 CompressionOptions = Optional[Union[str, CompressionDict]]
 
 
@@ -138,6 +138,6 @@ class IOargs(Generic[ModeVar, EncodingVar]):
 
     filepath_or_buffer: FileOrBuffer
     encoding: EncodingVar
-    compression: CompressionOptions
+    compression: CompressionDict
     should_close: bool
     mode: Union[ModeVar, str]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c48bec9b670ad..1713743b98bff 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -27,7 +27,6 @@
     Iterable,
     Iterator,
     List,
-    Mapping,
     Optional,
     Sequence,
     Set,
@@ -49,6 +48,7 @@
     ArrayLike,
     Axes,
     Axis,
+    CompressionOptions,
     Dtype,
     FilePathOrBuffer,
     FrameOrSeriesUnion,
@@ -2062,7 +2062,7 @@ def to_stata(
         variable_labels: Optional[Dict[Label, str]] = None,
         version: Optional[int] = 114,
         convert_strl: Optional[Sequence[Label]] = None,
-        compression: Union[str, Mapping[str, str], None] = "infer",
+        compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ) -> None:
         """
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 2b13d54ec3aed..a80b89569f429 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -205,11 +205,13 @@ def get_filepath_or_buffer(
     """
     filepath_or_buffer = stringify_path(filepath_or_buffer)
 
+    # handle compression dict
+    compression_method, compression = get_compression_method(compression)
+    compression_method = infer_compression(filepath_or_buffer, compression_method)
+    compression = dict(compression, method=compression_method)
+
     # bz2 and xz do not write the byte order mark for utf-16 and utf-32
     # print a warning when writing such files
-    compression_method = infer_compression(
-        filepath_or_buffer, get_compression_method(compression)[0]
-    )
     if (
         mode
         and "w" in mode
@@ -238,7 +240,7 @@ def get_filepath_or_buffer(
         content_encoding = req.headers.get("Content-Encoding", None)
         if content_encoding == "gzip":
             # Override compression based on Content-Encoding header
-            compression = "gzip"
+            compression = {"method": "gzip"}
         reader = BytesIO(req.read())
         req.close()
         return IOargs(
@@ -374,11 +376,7 @@ def get_compression_method(
     if isinstance(compression, Mapping):
         compression_args = dict(compression)
         try:
-            # error: Incompatible types in assignment (expression has type
-            # "Union[str, int, None]", variable has type "Optional[str]")
-            compression_method = compression_args.pop(  # type: ignore[assignment]
-                "method"
-            )
+            compression_method = compression_args.pop("method")
         except KeyError as err:
             raise ValueError("If mapping, compression must have key 'method'") from err
     else:
@@ -652,12 +650,8 @@ def __init__(
         super().__init__(file, mode, **kwargs_zip)  # type: ignore[arg-type]
 
     def write(self, data):
-        archive_name = self.filename
-        if self.archive_name is not None:
-            archive_name = self.archive_name
-        if archive_name is None:
-            # ZipFile needs a non-empty string
-            archive_name = "zip"
+        # ZipFile needs a non-empty string
+        archive_name = self.archive_name or self.filename or "zip"
         super().writestr(archive_name, data)
 
     @property
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 270caec022fef..15cd5c026c6b6 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -21,12 +21,7 @@
 )
 from pandas.core.dtypes.missing import notna
 
-from pandas.io.common import (
-    get_compression_method,
-    get_filepath_or_buffer,
-    get_handle,
-    infer_compression,
-)
+from pandas.io.common import get_filepath_or_buffer, get_handle
 
 
 class CSVFormatter:
@@ -60,17 +55,15 @@ def __init__(
         if path_or_buf is None:
             path_or_buf = StringIO()
 
-        # Extract compression mode as given, if dict
-        compression, self.compression_args = get_compression_method(compression)
-        self.compression = infer_compression(path_or_buf, compression)
-
         ioargs = get_filepath_or_buffer(
             path_or_buf,
             encoding=encoding,
-            compression=self.compression,
+            compression=compression,
             mode=mode,
             storage_options=storage_options,
         )
+        self.compression = ioargs.compression.pop("method")
+        self.compression_args = ioargs.compression
         self.path_or_buf = ioargs.filepath_or_buffer
         self.should_close = ioargs.should_close
         self.mode = ioargs.mode
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 7a3b76ff7e3d0..a4d923fdbe45a 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -19,12 +19,7 @@
 from pandas.core.construction import create_series_with_explicit_dtype
 from pandas.core.reshape.concat import concat
 
-from pandas.io.common import (
-    get_compression_method,
-    get_filepath_or_buffer,
-    get_handle,
-    infer_compression,
-)
+from pandas.io.common import get_compression_method, get_filepath_or_buffer, get_handle
 from pandas.io.json._normalize import convert_to_line_delimits
 from pandas.io.json._table_schema import build_table_schema, parse_table_schema
 from pandas.io.parsers import _validate_integer
@@ -66,6 +61,7 @@ def to_json(
         )
         path_or_buf = ioargs.filepath_or_buffer
         should_close = ioargs.should_close
+        compression = ioargs.compression
 
     if lines and orient != "records":
         raise ValueError("'lines' keyword only valid when 'orient' is records")
@@ -616,9 +612,6 @@ def read_json(
     if encoding is None:
         encoding = "utf-8"
 
-    compression_method, compression = get_compression_method(compression)
-    compression_method = infer_compression(path_or_buf, compression_method)
-    compression = dict(compression, method=compression_method)
     ioargs = get_filepath_or_buffer(
         path_or_buf,
         encoding=encoding,
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index c6ef5221e7ead..a0466c5ac6b57 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -63,12 +63,7 @@
 from pandas.core.series import Series
 from pandas.core.tools import datetimes as tools
 
-from pandas.io.common import (
-    get_filepath_or_buffer,
-    get_handle,
-    infer_compression,
-    validate_header_arg,
-)
+from pandas.io.common import get_filepath_or_buffer, get_handle, validate_header_arg
 from pandas.io.date_converters import generic_parser
 
 # BOM character (byte order mark)
@@ -424,9 +419,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     if encoding is not None:
         encoding = re.sub("_", "-", encoding).lower()
         kwds["encoding"] = encoding
-
     compression = kwds.get("compression", "infer")
-    compression = infer_compression(filepath_or_buffer, compression)
 
     # TODO: get_filepath_or_buffer could return
     # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
@@ -1976,6 +1969,10 @@ def __init__(self, src, **kwds):
 
         encoding = kwds.get("encoding")
 
+        # parsers.TextReader doesn't support compression dicts
+        if isinstance(kwds.get("compression"), dict):
+            kwds["compression"] = kwds["compression"]["method"]
+
         if kwds.get("compression") is None and encoding:
             if isinstance(src, str):
                 src = open(src, "rb")
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 857a2d1b69be4..655deb5ca3779 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -92,11 +92,8 @@ def to_pickle(
         mode="wb",
         storage_options=storage_options,
     )
-    compression = ioargs.compression
-    if not isinstance(ioargs.filepath_or_buffer, str) and compression == "infer":
-        compression = None
     f, fh = get_handle(
-        ioargs.filepath_or_buffer, "wb", compression=compression, is_text=False
+        ioargs.filepath_or_buffer, "wb", compression=ioargs.compression, is_text=False
     )
     if protocol < 0:
         protocol = pickle.HIGHEST_PROTOCOL
@@ -196,11 +193,8 @@ def read_pickle(
     ioargs = get_filepath_or_buffer(
         filepath_or_buffer, compression=compression, storage_options=storage_options
     )
-    compression = ioargs.compression
-    if not isinstance(ioargs.filepath_or_buffer, str) and compression == "infer":
-        compression = None
     f, fh = get_handle(
-        ioargs.filepath_or_buffer, "rb", compression=compression, is_text=False
+        ioargs.filepath_or_buffer, "rb", compression=ioargs.compression, is_text=False
     )
 
     # 1) try standard library Pickle
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 34d520004cc65..b3b16e04a5d9e 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -16,18 +16,7 @@
 from pathlib import Path
 import struct
 import sys
-from typing import (
-    Any,
-    AnyStr,
-    BinaryIO,
-    Dict,
-    List,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-)
+from typing import Any, AnyStr, BinaryIO, Dict, List, Optional, Sequence, Tuple, Union
 import warnings
 
 from dateutil.relativedelta import relativedelta
@@ -58,13 +47,7 @@
 from pandas.core.indexes.base import Index
 from pandas.core.series import Series
 
-from pandas.io.common import (
-    get_compression_method,
-    get_filepath_or_buffer,
-    get_handle,
-    infer_compression,
-    stringify_path,
-)
+from pandas.io.common import get_filepath_or_buffer, get_handle, stringify_path
 
 _version_error = (
     "Version of given Stata file is {version}. pandas supports importing "
@@ -1976,9 +1959,6 @@ def _open_file_binary_write(
         return fname, False, None  # type: ignore[return-value]
     elif isinstance(fname, (str, Path)):
         # Extract compression mode as given, if dict
-        compression_typ, compression_args = get_compression_method(compression)
-        compression_typ = infer_compression(fname, compression_typ)
-        compression = dict(compression_args, method=compression_typ)
         ioargs = get_filepath_or_buffer(
             fname, mode="wb", compression=compression, storage_options=storage_options
         )
@@ -2235,7 +2215,7 @@ def __init__(
         time_stamp: Optional[datetime.datetime] = None,
         data_label: Optional[str] = None,
         variable_labels: Optional[Dict[Label, str]] = None,
-        compression: Union[str, Mapping[str, str], None] = "infer",
+        compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ):
         super().__init__()
@@ -3118,7 +3098,7 @@ def __init__(
         data_label: Optional[str] = None,
         variable_labels: Optional[Dict[Label, str]] = None,
         convert_strl: Optional[Sequence[Label]] = None,
-        compression: Union[str, Mapping[str, str], None] = "infer",
+        compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ):
         # Copy to new list since convert_strl might be modified later
@@ -3523,7 +3503,7 @@ def __init__(
         variable_labels: Optional[Dict[Label, str]] = None,
         convert_strl: Optional[Sequence[Label]] = None,
         version: Optional[int] = None,
-        compression: Union[str, Mapping[str, str], None] = "infer",
+        compression: CompressionOptions = "infer",
         storage_options: StorageOptions = None,
     ):
         if version is None:
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 6331113ab8945..d1c6705dd7a6f 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -14,7 +14,9 @@
 import datetime
 import glob
 import gzip
+import io
 import os
+from pathlib import Path
 import pickle
 import shutil
 from warnings import catch_warnings, simplefilter
@@ -486,3 +488,30 @@ def test_read_pickle_with_subclass():
 
     tm.assert_series_equal(result[0], expected[0])
     assert isinstance(result[1], MyTz)
+
+
+def test_pickle_binary_object_compression(compression):
+    """
+    Read/write from binary file-objects w/wo compression.
+
+    GH 26237, GH 29054, and GH 29570
+    """
+    df = tm.makeDataFrame()
+
+    # reference for compression
+    with tm.ensure_clean() as path:
+        df.to_pickle(path, compression=compression)
+        reference = Path(path).read_bytes()
+
+    # write
+    buffer = io.BytesIO()
+    df.to_pickle(buffer, compression=compression)
+    buffer.seek(0)
+
+    # gzip  and zip safe the filename: cannot compare the compressed content
+    assert buffer.getvalue() == reference or compression in ("gzip", "zip")
+
+    # read
+    read_df = pd.read_pickle(buffer, compression=compression)
+    buffer.seek(0)
+    tm.assert_frame_equal(df, read_df)

From 40008d01bcd2f4e5c7b456afa90f1b661f863831 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 5 Sep 2020 15:55:11 +0100
Subject: [PATCH 28/71] TYP: Check for use of Union[Series, DataFrame] instead
 of FrameOrSeriesUnion alias (#36137)

---
 ci/code_checks.sh              |  8 ++++++++
 pandas/core/apply.py           | 16 +++++++---------
 pandas/core/groupby/generic.py | 10 +++++-----
 pandas/core/groupby/grouper.py |  2 +-
 pandas/core/reshape/merge.py   | 10 +++++-----
 pandas/core/reshape/pivot.py   |  4 ++--
 pandas/io/pytables.py          |  6 +++---
 7 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 6006d09bc3e78..8ee579cd25203 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -230,6 +230,9 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include=*.{py,pyx} '!r}' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    # -------------------------------------------------------------------------
+    # Type annotations
+
     MSG='Check for use of comment-based annotation syntax' ; echo $MSG
     invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -238,6 +241,11 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias' ; echo $MSG
+    invgrep -R --include="*.py" --exclude=_typing.py -E 'Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    # -------------------------------------------------------------------------
     MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
     invgrep -R --include=*.{py,pyx} '\.__class__' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 99a9e1377563c..bbf832f33065b 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -1,12 +1,12 @@
 import abc
 import inspect
-from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Tuple, Type, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Tuple, Type
 
 import numpy as np
 
 from pandas._config import option_context
 
-from pandas._typing import Axis
+from pandas._typing import Axis, FrameOrSeriesUnion
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.common import is_dict_like, is_list_like, is_sequence
@@ -73,7 +73,7 @@ def series_generator(self) -> Iterator["Series"]:
     @abc.abstractmethod
     def wrap_results_for_axis(
         self, results: ResType, res_index: "Index"
-    ) -> Union["Series", "DataFrame"]:
+    ) -> FrameOrSeriesUnion:
         pass
 
     # ---------------------------------------------------------------
@@ -289,9 +289,7 @@ def apply_series_generator(self) -> Tuple[ResType, "Index"]:
 
         return results, res_index
 
-    def wrap_results(
-        self, results: ResType, res_index: "Index"
-    ) -> Union["Series", "DataFrame"]:
+    def wrap_results(self, results: ResType, res_index: "Index") -> FrameOrSeriesUnion:
         from pandas import Series
 
         # see if we can infer the results
@@ -335,7 +333,7 @@ def result_columns(self) -> "Index":
 
     def wrap_results_for_axis(
         self, results: ResType, res_index: "Index"
-    ) -> Union["Series", "DataFrame"]:
+    ) -> FrameOrSeriesUnion:
         """ return the results for the rows """
 
         if self.result_type == "reduce":
@@ -408,9 +406,9 @@ def result_columns(self) -> "Index":
 
     def wrap_results_for_axis(
         self, results: ResType, res_index: "Index"
-    ) -> Union["Series", "DataFrame"]:
+    ) -> FrameOrSeriesUnion:
         """ return the results for the columns """
-        result: Union["Series", "DataFrame"]
+        result: FrameOrSeriesUnion
 
         # we have requested to expand
         if self.result_type == "expand":
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9fda3c7a6b354..728e1ee4653fd 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -325,7 +325,7 @@ def _aggregate_multiple_funcs(self, arg):
 
             arg = zip(columns, arg)
 
-        results: Dict[base.OutputKey, Union[Series, DataFrame]] = {}
+        results: Dict[base.OutputKey, FrameOrSeriesUnion] = {}
         for idx, (name, func) in enumerate(arg):
             obj = self
 
@@ -349,7 +349,7 @@ def _wrap_series_output(
         self,
         output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
         index: Optional[Index],
-    ) -> Union[Series, DataFrame]:
+    ) -> FrameOrSeriesUnion:
         """
         Wraps the output of a SeriesGroupBy operation into the expected result.
 
@@ -372,7 +372,7 @@ def _wrap_series_output(
         indexed_output = {key.position: val for key, val in output.items()}
         columns = Index(key.label for key in output)
 
-        result: Union[Series, DataFrame]
+        result: FrameOrSeriesUnion
         if len(output) > 1:
             result = self.obj._constructor_expanddim(indexed_output, index=index)
             result.columns = columns
@@ -390,7 +390,7 @@ def _wrap_aggregated_output(
         self,
         output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
         index: Optional[Index],
-    ) -> Union[Series, DataFrame]:
+    ) -> FrameOrSeriesUnion:
         """
         Wraps the output of a SeriesGroupBy aggregation into the expected result.
 
@@ -1122,7 +1122,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
                     raise
 
                 # We get here with a) EADtypes and b) object dtype
-                obj: Union[Series, DataFrame]
+                obj: FrameOrSeriesUnion
                 # call our grouper again with only this block
                 if isinstance(bvalues, ExtensionArray):
                     # TODO(EA2D): special case not needed with 2D EAs
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 6678edc3821c8..59ea7781025c4 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -393,7 +393,7 @@ class Grouping:
     ----------
     index : Index
     grouper :
-    obj Union[DataFrame, Series]:
+    obj : DataFrame or Series
     name : Label
     level :
     observed : bool, default False
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 602ff226f8878..f1c5486222ea1 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -6,14 +6,14 @@
 import datetime
 from functools import partial
 import string
-from typing import TYPE_CHECKING, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Optional, Tuple
 import warnings
 
 import numpy as np
 
 from pandas._libs import Timedelta, hashtable as libhashtable, lib
 import pandas._libs.join as libjoin
-from pandas._typing import ArrayLike, FrameOrSeries
+from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
 from pandas.errors import MergeError
 from pandas.util._decorators import Appender, Substitution
 
@@ -51,7 +51,7 @@
 from pandas.core.sorting import is_int64_overflow_possible
 
 if TYPE_CHECKING:
-    from pandas import DataFrame, Series  # noqa:F401
+    from pandas import DataFrame  # noqa:F401
 
 
 @Substitution("\nleft : DataFrame")
@@ -575,8 +575,8 @@ class _MergeOperation:
 
     def __init__(
         self,
-        left: Union["Series", "DataFrame"],
-        right: Union["Series", "DataFrame"],
+        left: FrameOrSeriesUnion,
+        right: FrameOrSeriesUnion,
         how: str = "inner",
         on=None,
         left_on=None,
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 969ac56e41860..842a42f80e1b7 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -12,7 +12,7 @@
 
 import numpy as np
 
-from pandas._typing import Label
+from pandas._typing import FrameOrSeriesUnion, Label
 from pandas.util._decorators import Appender, Substitution
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
@@ -200,7 +200,7 @@ def pivot_table(
 
 
 def _add_margins(
-    table: Union["Series", "DataFrame"],
+    table: FrameOrSeriesUnion,
     data,
     values,
     rows,
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 0913627324c48..e850a101a0a63 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -16,7 +16,7 @@
 
 from pandas._libs import lib, writers as libwriters
 from pandas._libs.tslibs import timezones
-from pandas._typing import ArrayLike, FrameOrSeries, Label
+from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion, Label
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.pickle_compat import patch_pickle
 from pandas.errors import PerformanceWarning
@@ -2566,7 +2566,7 @@ class Fixed:
 
     pandas_kind: str
     format_type: str = "fixed"  # GH#30962 needed by dask
-    obj_type: Type[Union[DataFrame, Series]]
+    obj_type: Type[FrameOrSeriesUnion]
     ndim: int
     encoding: str
     parent: HDFStore
@@ -4442,7 +4442,7 @@ class AppendableFrameTable(AppendableTable):
     pandas_kind = "frame_table"
     table_type = "appendable_frame"
     ndim = 2
-    obj_type: Type[Union[DataFrame, Series]] = DataFrame
+    obj_type: Type[FrameOrSeriesUnion] = DataFrame
 
     @property
     def is_transposed(self) -> bool:

From 6f2ca9231621603d13b1bfcd592e860a6179fb81 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sat, 5 Sep 2020 16:03:54 +0100
Subject: [PATCH 29/71] TYP: remove string literals for type annotations in
 pandas\core\frame.py (#36140)

---
 pandas/core/frame.py | 104 +++++++++++++++++++++----------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1713743b98bff..29d6fb9aa7d56 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -420,7 +420,7 @@ class DataFrame(NDFrame):
     _typ = "dataframe"
 
     @property
-    def _constructor(self) -> Type["DataFrame"]:
+    def _constructor(self) -> Type[DataFrame]:
         return DataFrame
 
     _constructor_sliced: Type[Series] = Series
@@ -1233,7 +1233,7 @@ def __rmatmul__(self, other):
     # IO methods (to / from other formats)
 
     @classmethod
-    def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFrame":
+    def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> DataFrame:
         """
         Construct DataFrame from dict of array-like or dicts.
 
@@ -1671,7 +1671,7 @@ def from_records(
         columns=None,
         coerce_float=False,
         nrows=None,
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Convert structured or record ndarray to DataFrame.
 
@@ -2012,7 +2012,7 @@ def _from_arrays(
         index,
         dtype: Optional[Dtype] = None,
         verify_integrity: bool = True,
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Create DataFrame from a list of arrays corresponding to the columns.
 
@@ -2720,7 +2720,7 @@ def memory_usage(self, index=True, deep=False) -> Series:
             ).append(result)
         return result
 
-    def transpose(self, *args, copy: bool = False) -> "DataFrame":
+    def transpose(self, *args, copy: bool = False) -> DataFrame:
         """
         Transpose index and columns.
 
@@ -2843,7 +2843,7 @@ def transpose(self, *args, copy: bool = False) -> "DataFrame":
         return result.__finalize__(self, method="transpose")
 
     @property
-    def T(self) -> "DataFrame":
+    def T(self) -> DataFrame:
         return self.transpose()
 
     # ----------------------------------------------------------------------
@@ -3503,7 +3503,7 @@ def eval(self, expr, inplace=False, **kwargs):
 
         return _eval(expr, inplace=inplace, **kwargs)
 
-    def select_dtypes(self, include=None, exclude=None) -> "DataFrame":
+    def select_dtypes(self, include=None, exclude=None) -> DataFrame:
         """
         Return a subset of the DataFrame's columns based on the column dtypes.
 
@@ -3667,7 +3667,7 @@ def insert(self, loc, column, value, allow_duplicates=False) -> None:
         value = self._sanitize_column(column, value, broadcast=False)
         self._mgr.insert(loc, column, value, allow_duplicates=allow_duplicates)
 
-    def assign(self, **kwargs) -> "DataFrame":
+    def assign(self, **kwargs) -> DataFrame:
         r"""
         Assign new columns to a DataFrame.
 
@@ -3965,7 +3965,7 @@ def _reindex_columns(
             allow_dups=False,
         )
 
-    def _reindex_multi(self, axes, copy, fill_value) -> "DataFrame":
+    def _reindex_multi(self, axes, copy, fill_value) -> DataFrame:
         """
         We are guaranteed non-Nones in the axes.
         """
@@ -3998,7 +3998,7 @@ def align(
         limit=None,
         fill_axis=0,
         broadcast_axis=None,
-    ) -> "DataFrame":
+    ) -> DataFrame:
         return super().align(
             other,
             join=join,
@@ -4067,7 +4067,7 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False):
             ("tolerance", None),
         ],
     )
-    def reindex(self, *args, **kwargs) -> "DataFrame":
+    def reindex(self, *args, **kwargs) -> DataFrame:
         axes = validate_axis_style_args(self, args, kwargs, "labels", "reindex")
         kwargs.update(axes)
         # Pop these, since the values are in `kwargs` under different names
@@ -4229,7 +4229,7 @@ def rename(
         inplace: bool = False,
         level: Optional[Level] = None,
         errors: str = "ignore",
-    ) -> Optional["DataFrame"]:
+    ) -> Optional[DataFrame]:
         """
         Alter axes labels.
 
@@ -4357,7 +4357,7 @@ def fillna(
         inplace=False,
         limit=None,
         downcast=None,
-    ) -> Optional["DataFrame"]:
+    ) -> Optional[DataFrame]:
         return super().fillna(
             value=value,
             method=method,
@@ -4465,7 +4465,7 @@ def _replace_columnwise(
         return res.__finalize__(self)
 
     @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])
-    def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "DataFrame":
+    def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> DataFrame:
         return super().shift(
             periods=periods, freq=freq, axis=axis, fill_value=fill_value
         )
@@ -4666,7 +4666,7 @@ def reset_index(
         inplace: bool = False,
         col_level: Hashable = 0,
         col_fill: Label = "",
-    ) -> Optional["DataFrame"]:
+    ) -> Optional[DataFrame]:
         """
         Reset the index, or a level of it.
 
@@ -4910,20 +4910,20 @@ def _maybe_casted_values(index, labels=None):
     # Reindex-based selection methods
 
     @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
-    def isna(self) -> "DataFrame":
+    def isna(self) -> DataFrame:
         result = self._constructor(self._data.isna(func=isna))
         return result.__finalize__(self, method="isna")
 
     @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
-    def isnull(self) -> "DataFrame":
+    def isnull(self) -> DataFrame:
         return self.isna()
 
     @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])
-    def notna(self) -> "DataFrame":
+    def notna(self) -> DataFrame:
         return ~self.isna()
 
     @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])
-    def notnull(self) -> "DataFrame":
+    def notnull(self) -> DataFrame:
         return ~self.isna()
 
     def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False):
@@ -5074,7 +5074,7 @@ def drop_duplicates(
         keep: Union[str, bool] = "first",
         inplace: bool = False,
         ignore_index: bool = False,
-    ) -> Optional["DataFrame"]:
+    ) -> Optional[DataFrame]:
         """
         Return DataFrame with duplicate rows removed.
 
@@ -5168,7 +5168,7 @@ def duplicated(
         self,
         subset: Optional[Union[Hashable, Sequence[Hashable]]] = None,
         keep: Union[str, bool] = "first",
-    ) -> "Series":
+    ) -> Series:
         """
         Return boolean Series denoting duplicate rows.
 
@@ -5619,7 +5619,7 @@ def value_counts(
 
         return counts
 
-    def nlargest(self, n, columns, keep="first") -> "DataFrame":
+    def nlargest(self, n, columns, keep="first") -> DataFrame:
         """
         Return the first `n` rows ordered by `columns` in descending order.
 
@@ -5728,7 +5728,7 @@ def nlargest(self, n, columns, keep="first") -> "DataFrame":
         """
         return algorithms.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest()
 
-    def nsmallest(self, n, columns, keep="first") -> "DataFrame":
+    def nsmallest(self, n, columns, keep="first") -> DataFrame:
         """
         Return the first `n` rows ordered by `columns` in ascending order.
 
@@ -5830,7 +5830,7 @@ def nsmallest(self, n, columns, keep="first") -> "DataFrame":
             self, n=n, keep=keep, columns=columns
         ).nsmallest()
 
-    def swaplevel(self, i=-2, j=-1, axis=0) -> "DataFrame":
+    def swaplevel(self, i=-2, j=-1, axis=0) -> DataFrame:
         """
         Swap levels i and j in a MultiIndex on a particular axis.
 
@@ -5861,7 +5861,7 @@ def swaplevel(self, i=-2, j=-1, axis=0) -> "DataFrame":
             result.columns = result.columns.swaplevel(i, j)
         return result
 
-    def reorder_levels(self, order, axis=0) -> "DataFrame":
+    def reorder_levels(self, order, axis=0) -> DataFrame:
         """
         Rearrange index levels using input order. May not drop or duplicate levels.
 
@@ -5894,7 +5894,7 @@ def reorder_levels(self, order, axis=0) -> "DataFrame":
     # ----------------------------------------------------------------------
     # Arithmetic / combination related
 
-    def _combine_frame(self, other: "DataFrame", func, fill_value=None):
+    def _combine_frame(self, other: DataFrame, func, fill_value=None):
         # at this point we have `self._indexed_same(other)`
 
         if fill_value is None:
@@ -5914,7 +5914,7 @@ def _arith_op(left, right):
         new_data = ops.dispatch_to_series(self, other, _arith_op)
         return new_data
 
-    def _construct_result(self, result) -> "DataFrame":
+    def _construct_result(self, result) -> DataFrame:
         """
         Wrap the result of an arithmetic, comparison, or logical operation.
 
@@ -6031,11 +6031,11 @@ def _construct_result(self, result) -> "DataFrame":
     @Appender(_shared_docs["compare"] % _shared_doc_kwargs)
     def compare(
         self,
-        other: "DataFrame",
+        other: DataFrame,
         align_axis: Axis = 1,
         keep_shape: bool = False,
         keep_equal: bool = False,
-    ) -> "DataFrame":
+    ) -> DataFrame:
         return super().compare(
             other=other,
             align_axis=align_axis,
@@ -6044,8 +6044,8 @@ def compare(
         )
 
     def combine(
-        self, other: "DataFrame", func, fill_value=None, overwrite=True
-    ) -> "DataFrame":
+        self, other: DataFrame, func, fill_value=None, overwrite=True
+    ) -> DataFrame:
         """
         Perform column-wise combine with another DataFrame.
 
@@ -6212,7 +6212,7 @@ def combine(
         # convert_objects just in case
         return self._constructor(result, index=new_index, columns=new_columns)
 
-    def combine_first(self, other: "DataFrame") -> "DataFrame":
+    def combine_first(self, other: DataFrame) -> DataFrame:
         """
         Update null elements with value in the same location in `other`.
 
@@ -6718,7 +6718,7 @@ def groupby(
 
     @Substitution("")
     @Appender(_shared_docs["pivot"])
-    def pivot(self, index=None, columns=None, values=None) -> "DataFrame":
+    def pivot(self, index=None, columns=None, values=None) -> DataFrame:
         from pandas.core.reshape.pivot import pivot
 
         return pivot(self, index=index, columns=columns, values=values)
@@ -6870,7 +6870,7 @@ def pivot_table(
         dropna=True,
         margins_name="All",
         observed=False,
-    ) -> "DataFrame":
+    ) -> DataFrame:
         from pandas.core.reshape.pivot import pivot_table
 
         return pivot_table(
@@ -7056,7 +7056,7 @@ def stack(self, level=-1, dropna=True):
 
     def explode(
         self, column: Union[str, Tuple], ignore_index: bool = False
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Transform each element of a list-like to a row, replicating index values.
 
@@ -7211,7 +7211,7 @@ def melt(
         value_name="value",
         col_level=None,
         ignore_index=True,
-    ) -> "DataFrame":
+    ) -> DataFrame:
 
         return melt(
             self,
@@ -7299,7 +7299,7 @@ def melt(
         1  255.0"""
         ),
     )
-    def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame":
+    def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
 
         bm_axis = self._get_block_manager_axis(axis)
         self._consolidate_inplace()
@@ -7462,7 +7462,7 @@ def _aggregate(self, arg, axis=0, *args, **kwargs):
         klass=_shared_doc_kwargs["klass"],
         axis=_shared_doc_kwargs["axis"],
     )
-    def transform(self, func, axis=0, *args, **kwargs) -> "DataFrame":
+    def transform(self, func, axis=0, *args, **kwargs) -> DataFrame:
         axis = self._get_axis_number(axis)
         if axis == 1:
             return self.T.transform(func, *args, **kwargs).T
@@ -7616,7 +7616,7 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds):
         )
         return op.get_result()
 
-    def applymap(self, func) -> "DataFrame":
+    def applymap(self, func) -> DataFrame:
         """
         Apply a function to a Dataframe elementwise.
 
@@ -7678,7 +7678,7 @@ def infer(x):
 
     def append(
         self, other, ignore_index=False, verify_integrity=False, sort=False
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Append rows of `other` to the end of caller, returning a new object.
 
@@ -7818,7 +7818,7 @@ def append(
 
     def join(
         self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Join columns of another DataFrame.
 
@@ -8009,7 +8009,7 @@ def merge(
         copy=True,
         indicator=False,
         validate=None,
-    ) -> "DataFrame":
+    ) -> DataFrame:
         from pandas.core.reshape.merge import merge
 
         return merge(
@@ -8028,7 +8028,7 @@ def merge(
             validate=validate,
         )
 
-    def round(self, decimals=0, *args, **kwargs) -> "DataFrame":
+    def round(self, decimals=0, *args, **kwargs) -> DataFrame:
         """
         Round a DataFrame to a variable number of decimal places.
 
@@ -8142,7 +8142,7 @@ def _series_round(s, decimals):
     # ----------------------------------------------------------------------
     # Statistical methods, etc.
 
-    def corr(self, method="pearson", min_periods=1) -> "DataFrame":
+    def corr(self, method="pearson", min_periods=1) -> DataFrame:
         """
         Compute pairwise correlation of columns, excluding NA/null values.
 
@@ -8233,7 +8233,7 @@ def corr(self, method="pearson", min_periods=1) -> "DataFrame":
 
     def cov(
         self, min_periods: Optional[int] = None, ddof: Optional[int] = 1
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Compute pairwise covariance of columns, excluding NA/null values.
 
@@ -8636,7 +8636,7 @@ def func(values):
             else:
                 return op(values, axis=axis, skipna=skipna, **kwds)
 
-        def _get_data(axis_matters: bool) -> "DataFrame":
+        def _get_data(axis_matters: bool) -> DataFrame:
             if filter_type is None:
                 data = self._get_numeric_data()
             elif filter_type == "bool":
@@ -8937,7 +8937,7 @@ def _get_agg_axis(self, axis_num: int) -> Index:
         else:
             raise ValueError(f"Axis must be 0 or 1 (got {repr(axis_num)})")
 
-    def mode(self, axis=0, numeric_only=False, dropna=True) -> "DataFrame":
+    def mode(self, axis=0, numeric_only=False, dropna=True) -> DataFrame:
         """
         Get the mode(s) of each element along the selected axis.
 
@@ -9122,7 +9122,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
 
     def to_timestamp(
         self, freq=None, how: str = "start", axis: Axis = 0, copy: bool = True
-    ) -> "DataFrame":
+    ) -> DataFrame:
         """
         Cast to DatetimeIndex of timestamps, at *beginning* of period.
 
@@ -9151,7 +9151,7 @@ def to_timestamp(
         setattr(new_obj, axis_name, new_ax)
         return new_obj
 
-    def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> "DataFrame":
+    def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> DataFrame:
         """
         Convert DataFrame from DatetimeIndex to PeriodIndex.
 
@@ -9180,7 +9180,7 @@ def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> "DataFrame"
         setattr(new_obj, axis_name, new_ax)
         return new_obj
 
-    def isin(self, values) -> "DataFrame":
+    def isin(self, values) -> DataFrame:
         """
         Whether each element in the DataFrame is contained in values.
 
@@ -9287,10 +9287,10 @@ def isin(self, values) -> "DataFrame":
     _info_axis_number = 1
     _info_axis_name = "columns"
 
-    index: "Index" = properties.AxisProperty(
+    index: Index = properties.AxisProperty(
         axis=1, doc="The index (row labels) of the DataFrame."
     )
-    columns: "Index" = properties.AxisProperty(
+    columns: Index = properties.AxisProperty(
         axis=0, doc="The column labels of the DataFrame."
     )
 

From e7668950713670b86f0ac024147908376d3b79be Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 5 Sep 2020 10:53:21 -0700
Subject: [PATCH 30/71] STY+CI: check for private function access across
 modules (#36144)

---
 Makefile                              |  7 +++
 ci/code_checks.sh                     |  8 ++++
 pandas/_libs/algos.pyx                | 14 +++---
 pandas/core/algorithms.py             |  8 ++--
 pandas/core/arrays/sparse/array.py    |  2 +-
 pandas/core/internals/blocks.py       |  2 +-
 pandas/core/missing.py                |  2 +-
 pandas/plotting/_matplotlib/compat.py | 10 ++--
 pandas/plotting/_matplotlib/core.py   |  4 +-
 pandas/plotting/_matplotlib/tools.py  |  2 +-
 pandas/tests/plotting/common.py       |  8 ++--
 pandas/tests/plotting/test_frame.py   |  4 +-
 pandas/tests/plotting/test_misc.py    |  4 +-
 scripts/validate_unwanted_patterns.py | 69 +++++++++++++++++++++++++--
 14 files changed, 111 insertions(+), 33 deletions(-)

diff --git a/Makefile b/Makefile
index f26689ab65ba5..4a9a48992f92f 100644
--- a/Makefile
+++ b/Makefile
@@ -25,3 +25,10 @@ doc:
 	cd doc; \
 	python make.py clean; \
 	python make.py html
+
+check:
+	python3 scripts/validate_unwanted_patterns.py \
+		--validation-type="private_function_across_module" \
+		--included-file-extensions="py" \
+		--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
+		pandas/
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 8ee579cd25203..875f1dbb83ce3 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -116,6 +116,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     fi
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of private module attribute access' ; echo $MSG
+    if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
+    else
+        $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
+    fi
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     echo "isort --version-number"
     isort --version-number
 
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 0a70afda893cf..c4723a5f064c7 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -412,7 +412,7 @@ ctypedef fused algos_t:
     uint8_t
 
 
-def _validate_limit(nobs: int, limit=None) -> int:
+def validate_limit(nobs: int, limit=None) -> int:
     """
     Check that the `limit` argument is a positive integer.
 
@@ -452,7 +452,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
     indexer = np.empty(nright, dtype=np.int64)
     indexer[:] = -1
 
-    lim = _validate_limit(nright, limit)
+    lim = validate_limit(nright, limit)
 
     if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
         return indexer
@@ -509,7 +509,7 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     val = values[0]
     for i in range(N):
@@ -537,7 +537,7 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     for j in range(K):
         fill_count = 0
@@ -593,7 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
     indexer = np.empty(nright, dtype=np.int64)
     indexer[:] = -1
 
-    lim = _validate_limit(nright, limit)
+    lim = validate_limit(nright, limit)
 
     if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
         return indexer
@@ -651,7 +651,7 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     val = values[N - 1]
     for i in range(N - 1, -1, -1):
@@ -681,7 +681,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
     if N == 0:
         return
 
-    lim = _validate_limit(N, limit)
+    lim = validate_limit(N, limit)
 
     for j in range(K):
         fill_count = 0
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index f297c7165208f..50ec3714f454b 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -60,7 +60,7 @@
 from pandas.core.indexers import validate_indices
 
 if TYPE_CHECKING:
-    from pandas import Categorical, DataFrame, Series
+    from pandas import Categorical, DataFrame, Series  # noqa:F401
 
 _shared_docs: Dict[str, str] = {}
 
@@ -767,7 +767,7 @@ def value_counts(
             counts = result._values
 
         else:
-            keys, counts = _value_counts_arraylike(values, dropna)
+            keys, counts = value_counts_arraylike(values, dropna)
 
             result = Series(counts, index=keys, name=name)
 
@@ -780,8 +780,8 @@ def value_counts(
     return result
 
 
-# Called once from SparseArray
-def _value_counts_arraylike(values, dropna: bool):
+# Called once from SparseArray, otherwise could be private
+def value_counts_arraylike(values, dropna: bool):
     """
     Parameters
     ----------
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 1531f7b292365..47c960dc969d6 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -735,7 +735,7 @@ def value_counts(self, dropna=True):
         """
         from pandas import Index, Series
 
-        keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna)
+        keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna)
         fcounts = self.sp_index.ngaps
         if fcounts > 0:
             if self._null_fill_value and dropna:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 3bcd4debbf41a..9f4e535dc787d 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -390,7 +390,7 @@ def fillna(
 
         mask = isna(self.values)
         if limit is not None:
-            limit = libalgos._validate_limit(None, limit=limit)
+            limit = libalgos.validate_limit(None, limit=limit)
             mask[mask.cumsum(self.ndim - 1) > limit] = False
 
         if not self._can_hold_na:
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 7802c5cbdbfb3..be66b19d10064 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -228,7 +228,7 @@ def interpolate_1d(
             )
 
     # default limit is unlimited GH #16282
-    limit = algos._validate_limit(nobs=None, limit=limit)
+    limit = algos.validate_limit(nobs=None, limit=limit)
 
     # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
     all_nans = set(np.flatnonzero(invalid))
diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py
index 7f107f18eca25..964596d9b6319 100644
--- a/pandas/plotting/_matplotlib/compat.py
+++ b/pandas/plotting/_matplotlib/compat.py
@@ -17,8 +17,8 @@ def inner():
     return inner
 
 
-_mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
-_mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
-_mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
-_mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
-_mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)
+mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge)
+mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge)
+mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge)
+mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)
+mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge)
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index def4a1dc3f5c4..8275c0991e464 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -29,7 +29,7 @@
 import pandas.core.common as com
 
 from pandas.io.formats.printing import pprint_thing
-from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
+from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
 from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters
 from pandas.plotting._matplotlib.style import get_standard_colors
 from pandas.plotting._matplotlib.timeseries import (
@@ -944,7 +944,7 @@ def _plot_colorbar(self, ax: "Axes", **kwds):
         img = ax.collections[-1]
         cbar = self.fig.colorbar(img, ax=ax, **kwds)
 
-        if _mpl_ge_3_0_0():
+        if mpl_ge_3_0_0():
             # The workaround below is no longer necessary.
             return
 
diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
index 98aaab6838fba..c5b44f37150bb 100644
--- a/pandas/plotting/_matplotlib/tools.py
+++ b/pandas/plotting/_matplotlib/tools.py
@@ -307,7 +307,7 @@ def handle_shared_axes(
     sharey: bool,
 ):
     if nplots > 1:
-        if compat._mpl_ge_3_2_0():
+        if compat.mpl_ge_3_2_0():
             row_num = lambda x: x.get_subplotspec().rowspan.start
             col_num = lambda x: x.get_subplotspec().colspan.start
         else:
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index b753c96af6290..9301a29933d45 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -28,10 +28,10 @@ def setup_method(self, method):
 
         mpl.rcdefaults()
 
-        self.mpl_ge_2_2_3 = compat._mpl_ge_2_2_3()
-        self.mpl_ge_3_0_0 = compat._mpl_ge_3_0_0()
-        self.mpl_ge_3_1_0 = compat._mpl_ge_3_1_0()
-        self.mpl_ge_3_2_0 = compat._mpl_ge_3_2_0()
+        self.mpl_ge_2_2_3 = compat.mpl_ge_2_2_3()
+        self.mpl_ge_3_0_0 = compat.mpl_ge_3_0_0()
+        self.mpl_ge_3_1_0 = compat.mpl_ge_3_1_0()
+        self.mpl_ge_3_2_0 = compat.mpl_ge_3_2_0()
 
         self.bp_n_objects = 7
         self.polycollection_factor = 2
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 3b3902647390d..d2b22c7a4c2e3 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -51,7 +51,7 @@ def _assert_xtickslabels_visibility(self, axes, expected):
     @pytest.mark.xfail(reason="Waiting for PR 34334", strict=True)
     @pytest.mark.slow
     def test_plot(self):
-        from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0
+        from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0
 
         df = self.tdf
         _check_plot_works(df.plot, grid=False)
@@ -69,7 +69,7 @@ def test_plot(self):
         self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
 
         df = DataFrame({"x": [1, 2], "y": [3, 4]})
-        if _mpl_ge_3_1_0():
+        if mpl_ge_3_1_0():
             msg = "'Line2D' object has no property 'blarg'"
         else:
             msg = "Unknown property blarg"
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index 130acaa8bcd58..0208ab3e0225b 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -96,7 +96,7 @@ def test_bootstrap_plot(self):
 class TestDataFramePlots(TestPlotBase):
     @td.skip_if_no_scipy
     def test_scatter_matrix_axis(self):
-        from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
+        from pandas.plotting._matplotlib.compat import mpl_ge_3_0_0
 
         scatter_matrix = plotting.scatter_matrix
 
@@ -105,7 +105,7 @@ def test_scatter_matrix_axis(self):
 
         # we are plotting multiples on a sub-plot
         with tm.assert_produces_warning(
-            UserWarning, raise_on_extra_warnings=_mpl_ge_3_0_0()
+            UserWarning, raise_on_extra_warnings=mpl_ge_3_0_0()
         ):
             axes = _check_plot_works(
                 scatter_matrix, filterwarnings="always", frame=df, range_padding=0.1
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
index 193fef026a96b..1a6d8cc8b9914 100755
--- a/scripts/validate_unwanted_patterns.py
+++ b/scripts/validate_unwanted_patterns.py
@@ -16,9 +16,7 @@
 import sys
 import token
 import tokenize
-from typing import IO, Callable, FrozenSet, Iterable, List, Tuple
-
-PATHS_TO_IGNORE: Tuple[str, ...] = ("asv_bench/env",)
+from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple
 
 
 def _get_literal_string_prefix_len(token_string: str) -> int:
@@ -114,6 +112,58 @@ def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
                 )
 
 
+PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"}  # no known alternative
+
+
+def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Checking that a private function is not used across modules.
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+    Yields
+    ------
+    line_number : int
+        Line number of the private function that is used across modules.
+    msg : str
+        Explenation of the error.
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    imported_modules: Set[str] = set()
+
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.Import, ast.ImportFrom)):
+            for module in node.names:
+                module_fqdn = module.name if module.asname is None else module.asname
+                imported_modules.add(module_fqdn)
+
+        if not isinstance(node, ast.Call):
+            continue
+
+        try:
+            module_name = node.func.value.id
+            function_name = node.func.attr
+        except AttributeError:
+            continue
+
+        # Exception section #
+
+        # (Debatable) Class case
+        if module_name[0].isupper():
+            continue
+        # (Debatable) Dunder methods case
+        elif function_name.startswith("__") and function_name.endswith("__"):
+            continue
+        elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED:
+            continue
+
+        if module_name in imported_modules and function_name.startswith("_"):
+            yield (node.lineno, f"Private function '{module_name}.{function_name}'")
+
+
 def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
     """
     This test case is necessary after 'Black' (https://github.com/psf/black),
@@ -293,6 +343,7 @@ def main(
     source_path: str,
     output_format: str,
     file_extensions_to_check: str,
+    excluded_file_paths: str,
 ) -> bool:
     """
     Main entry point of the script.
@@ -305,6 +356,10 @@ def main(
         Source path representing path to a file/directory.
     output_format : str
         Output format of the error message.
+    file_extensions_to_check : str
+        Coma seperated values of what file extensions to check.
+    excluded_file_paths : str
+        Coma seperated values of what file paths to exclude during the check.
 
     Returns
     -------
@@ -325,6 +380,7 @@ def main(
     FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset(
         file_extensions_to_check.split(",")
     )
+    PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(","))
 
     if os.path.isfile(source_path):
         file_path = source_path
@@ -362,6 +418,7 @@ def main(
 if __name__ == "__main__":
     available_validation_types: List[str] = [
         "bare_pytest_raises",
+        "private_function_across_module",
         "strings_to_concatenate",
         "strings_with_wrong_placed_whitespace",
     ]
@@ -389,6 +446,11 @@ def main(
         default="py,pyx,pxd,pxi",
         help="Coma seperated file extensions to check.",
     )
+    parser.add_argument(
+        "--excluded-file-paths",
+        default="asv_bench/env",
+        help="Comma separated file extensions to check.",
+    )
 
     args = parser.parse_args()
 
@@ -398,5 +460,6 @@ def main(
             source_path=args.path,
             output_format=args.format,
             file_extensions_to_check=args.included_file_extensions,
+            excluded_file_paths=args.excluded_file_paths,
         )
     )

From 65407bcee68ca604e2cba85039ef79ebce9c1254 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 5 Sep 2020 10:54:30 -0700
Subject: [PATCH 31/71] CLN: unused case in compare_or_regex_search (#36143)

---
 pandas/core/array_algos/replace.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
index 6ac3cc1f9f2fe..09f9aefd64096 100644
--- a/pandas/core/array_algos/replace.py
+++ b/pandas/core/array_algos/replace.py
@@ -3,7 +3,7 @@
 """
 import operator
 import re
-from typing import Optional, Pattern, Union
+from typing import Pattern, Union
 
 import numpy as np
 
@@ -14,14 +14,10 @@
     is_numeric_v_string_like,
     is_scalar,
 )
-from pandas.core.dtypes.missing import isna
 
 
 def compare_or_regex_search(
-    a: ArrayLike,
-    b: Union[Scalar, Pattern],
-    regex: bool = False,
-    mask: Optional[ArrayLike] = None,
+    a: ArrayLike, b: Union[Scalar, Pattern], regex: bool, mask: ArrayLike,
 ) -> Union[ArrayLike, bool]:
     """
     Compare two array_like inputs of the same shape or two scalar values
@@ -33,8 +29,8 @@ def compare_or_regex_search(
     ----------
     a : array_like
     b : scalar or regex pattern
-    regex : bool, default False
-    mask : array_like or None (default)
+    regex : bool
+    mask : array_like
 
     Returns
     -------
@@ -68,8 +64,6 @@ def _check_comparison_types(
         )
 
     # GH#32621 use mask to avoid comparing to NAs
-    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
-        mask = np.reshape(~(isna(a)), a.shape)
     if isinstance(a, np.ndarray):
         a = a[mask]
 

From 238de4c4c3075351faacb74a392273884951474b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sat, 5 Sep 2020 12:40:06 -0700
Subject: [PATCH 32/71] REF: window/test_dtypes.py with pytest idioms (#35918)

---
 pandas/tests/window/conftest.py    |  31 +++
 pandas/tests/window/test_dtypes.py | 315 ++++++++---------------------
 2 files changed, 118 insertions(+), 228 deletions(-)

diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index eb8252d5731be..7f03fa2a5ea0d 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -308,3 +308,34 @@ def which(request):
 def halflife_with_times(request):
     """Halflife argument for EWM when times is specified."""
     return request.param
+
+
+@pytest.fixture(
+    params=[
+        "object",
+        "category",
+        "int8",
+        "int16",
+        "int32",
+        "int64",
+        "uint8",
+        "uint16",
+        "uint32",
+        "uint64",
+        "float16",
+        "float32",
+        "float64",
+        "m8[ns]",
+        "M8[ns]",
+        pytest.param(
+            "datetime64[ns, UTC]",
+            marks=pytest.mark.skip(
+                "direct creation of extension dtype datetime64[ns, UTC] "
+                "is not supported ATM"
+            ),
+        ),
+    ]
+)
+def dtypes(request):
+    """Dtypes for window tests"""
+    return request.param
diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py
index 0aa5bf019ff5e..245b48b351684 100644
--- a/pandas/tests/window/test_dtypes.py
+++ b/pandas/tests/window/test_dtypes.py
@@ -1,5 +1,3 @@
-from itertools import product
-
 import numpy as np
 import pytest
 
@@ -10,234 +8,95 @@
 # gh-12373 : rolling functions error on float32 data
 # make sure rolling functions works for different dtypes
 #
-# NOTE that these are yielded tests and so _create_data
-# is explicitly called.
-#
 # further note that we are only checking rolling for fully dtype
 # compliance (though both expanding and ewm inherit)
 
 
-class Dtype:
-    window = 2
-
-    funcs = {
-        "count": lambda v: v.count(),
-        "max": lambda v: v.max(),
-        "min": lambda v: v.min(),
-        "sum": lambda v: v.sum(),
-        "mean": lambda v: v.mean(),
-        "std": lambda v: v.std(),
-        "var": lambda v: v.var(),
-        "median": lambda v: v.median(),
-    }
-
-    def get_expects(self):
-        expects = {
-            "sr1": {
-                "count": Series([1, 2, 2, 2, 2], dtype="float64"),
-                "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"),
-                "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"),
-                "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"),
-                "mean": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"),
-                "std": Series([np.nan] + [np.sqrt(0.5)] * 4, dtype="float64"),
-                "var": Series([np.nan, 0.5, 0.5, 0.5, 0.5], dtype="float64"),
-                "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"),
+def get_dtype(dtype, coerce_int=None):
+    if coerce_int is False and "int" in dtype:
+        return None
+    if dtype != "category":
+        return np.dtype(dtype)
+    return dtype
+
+
+@pytest.mark.parametrize(
+    "method, data, expected_data, coerce_int",
+    [
+        ("count", np.arange(5), [1, 2, 2, 2, 2], True),
+        ("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True),
+        ("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False),
+        ("max", np.arange(5), [np.nan, 1, 2, 3, 4], True),
+        ("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True),
+        ("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False),
+        ("min", np.arange(5), [np.nan, 0, 1, 2, 3], True),
+        ("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True),
+        ("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False),
+        ("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True),
+        ("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True),
+        ("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False),
+        ("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True),
+        ("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True),
+        ("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False),
+        ("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True),
+        ("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True),
+        (
+            "std",
+            [0, 1, 2, np.nan, 4],
+            [np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2,
+            False,
+        ),
+        ("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True),
+        ("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True),
+        ("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False),
+        ("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True),
+        ("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True),
+        ("median", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False),
+    ],
+)
+def test_series_dtypes(method, data, expected_data, coerce_int, dtypes):
+    s = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
+    if dtypes in ("m8[ns]", "M8[ns]") and method != "count":
+        msg = "No numeric types to aggregate"
+        with pytest.raises(DataError, match=msg):
+            getattr(s.rolling(2), method)()
+    else:
+        result = getattr(s.rolling(2), method)()
+        expected = Series(expected_data, dtype="float64")
+        tm.assert_almost_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, expected_data",
+    [
+        ("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}),
+        ("max", {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}),
+        ("min", {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}),
+        (
+            "sum",
+            {0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])},
+        ),
+        ("mean", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}),
+        (
+            "std",
+            {
+                0: Series([np.nan] + [np.sqrt(2)] * 4),
+                1: Series([np.nan] + [np.sqrt(2)] * 4),
             },
-            "sr2": {
-                "count": Series([1, 2, 2, 2, 2], dtype="float64"),
-                "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"),
-                "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"),
-                "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"),
-                "mean": Series([np.nan, 9, 7, 5, 3], dtype="float64"),
-                "std": Series([np.nan] + [np.sqrt(2)] * 4, dtype="float64"),
-                "var": Series([np.nan, 2, 2, 2, 2], dtype="float64"),
-                "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"),
-            },
-            "sr3": {
-                "count": Series([1, 2, 2, 1, 1], dtype="float64"),
-                "max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"),
-                "min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"),
-                "sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"),
-                "mean": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"),
-                "std": Series(
-                    [np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2, dtype="float64"
-                ),
-                "var": Series([np.nan, 0.5, 0.5, np.nan, np.nan], dtype="float64"),
-                "median": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"),
-            },
-            "df": {
-                "count": DataFrame(
-                    {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])},
-                    dtype="float64",
-                ),
-                "max": DataFrame(
-                    {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
-                    dtype="float64",
-                ),
-                "min": DataFrame(
-                    {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
-                    dtype="float64",
-                ),
-                "sum": DataFrame(
-                    {
-                        0: Series([np.nan, 2, 6, 10, 14]),
-                        1: Series([np.nan, 4, 8, 12, 16]),
-                    },
-                    dtype="float64",
-                ),
-                "mean": DataFrame(
-                    {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
-                    dtype="float64",
-                ),
-                "std": DataFrame(
-                    {
-                        0: Series([np.nan] + [np.sqrt(2)] * 4),
-                        1: Series([np.nan] + [np.sqrt(2)] * 4),
-                    },
-                    dtype="float64",
-                ),
-                "var": DataFrame(
-                    {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
-                    dtype="float64",
-                ),
-                "median": DataFrame(
-                    {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
-                    dtype="float64",
-                ),
-            },
-        }
-        return expects
-
-    def _create_dtype_data(self, dtype):
-        sr1 = Series(np.arange(5), dtype=dtype)
-        sr2 = Series(np.arange(10, 0, -2), dtype=dtype)
-        sr3 = sr1.copy()
-        sr3[3] = np.NaN
-        df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)
-
-        data = {"sr1": sr1, "sr2": sr2, "sr3": sr3, "df": df}
-
-        return data
-
-    def _create_data(self):
-        self.data = self._create_dtype_data(self.dtype)
-        self.expects = self.get_expects()
-
-    def test_dtypes(self):
-        self._create_data()
-        for f_name, d_name in product(self.funcs.keys(), self.data.keys()):
-
-            f = self.funcs[f_name]
-            d = self.data[d_name]
-            exp = self.expects[d_name][f_name]
-            self.check_dtypes(f, f_name, d, d_name, exp)
-
-    def check_dtypes(self, f, f_name, d, d_name, exp):
-        roll = d.rolling(window=self.window)
-        result = f(roll)
-        tm.assert_almost_equal(result, exp)
-
-
-class TestDtype_object(Dtype):
-    dtype = object
-
-
-class Dtype_integer(Dtype):
-    pass
-
-
-class TestDtype_int8(Dtype_integer):
-    dtype = np.int8
-
-
-class TestDtype_int16(Dtype_integer):
-    dtype = np.int16
-
-
-class TestDtype_int32(Dtype_integer):
-    dtype = np.int32
-
-
-class TestDtype_int64(Dtype_integer):
-    dtype = np.int64
-
-
-class Dtype_uinteger(Dtype):
-    pass
-
-
-class TestDtype_uint8(Dtype_uinteger):
-    dtype = np.uint8
-
-
-class TestDtype_uint16(Dtype_uinteger):
-    dtype = np.uint16
-
-
-class TestDtype_uint32(Dtype_uinteger):
-    dtype = np.uint32
-
-
-class TestDtype_uint64(Dtype_uinteger):
-    dtype = np.uint64
-
-
-class Dtype_float(Dtype):
-    pass
-
-
-class TestDtype_float16(Dtype_float):
-    dtype = np.float16
-
-
-class TestDtype_float32(Dtype_float):
-    dtype = np.float32
-
-
-class TestDtype_float64(Dtype_float):
-    dtype = np.float64
-
-
-class TestDtype_category(Dtype):
-    dtype = "category"
-    include_df = False
-
-    def _create_dtype_data(self, dtype):
-        sr1 = Series(range(5), dtype=dtype)
-        sr2 = Series(range(10, 0, -2), dtype=dtype)
-
-        data = {"sr1": sr1, "sr2": sr2}
-
-        return data
-
-
-class DatetimeLike(Dtype):
-    def check_dtypes(self, f, f_name, d, d_name, exp):
-
-        roll = d.rolling(window=self.window)
-        if f_name == "count":
-            result = f(roll)
-            tm.assert_almost_equal(result, exp)
-
-        else:
-            msg = "No numeric types to aggregate"
-            with pytest.raises(DataError, match=msg):
-                f(roll)
-
-
-class TestDtype_timedelta(DatetimeLike):
-    dtype = np.dtype("m8[ns]")
-
-
-class TestDtype_datetime(DatetimeLike):
-    dtype = np.dtype("M8[ns]")
-
-
-class TestDtype_datetime64UTC(DatetimeLike):
-    dtype = "datetime64[ns, UTC]"
-
-    def _create_data(self):
-        pytest.skip(
-            "direct creation of extension dtype "
-            "datetime64[ns, UTC] is not supported ATM"
-        )
+        ),
+        ("var", {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}),
+        ("median", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}),
+    ],
+)
+def test_dataframe_dtypes(method, expected_data, dtypes):
+    if dtypes == "category":
+        pytest.skip("Category dataframe testing not implemented.")
+    df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes))
+    if dtypes in ("m8[ns]", "M8[ns]") and method != "count":
+        msg = "No numeric types to aggregate"
+        with pytest.raises(DataError, match=msg):
+            getattr(df.rolling(2), method)()
+    else:
+        result = getattr(df.rolling(2), method)()
+        expected = DataFrame(expected_data, dtype="float64")
+        tm.assert_frame_equal(result, expected)

From 13c0dd320191ed5124d63cf076987d5c75bc573e Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Sat, 5 Sep 2020 14:50:43 -0500
Subject: [PATCH 33/71] DOC: add userwarning doc about mpl #35684 (#36145)

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index b1229a5d5823d..d7d2e3cf876ca 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -301,7 +301,7 @@ Plotting
 ^^^^^^^^
 
 - Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`)
--
+- meth:`DataFrame.plot` and meth:`Series.plot` raise ``UserWarning`` about usage of FixedFormatter and FixedLocator (:issue:`35684` and :issue:`35945`)
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^

From 435a1d09f61c1f47d51e9fc85ed0386c34073b05 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 5 Sep 2020 12:55:36 -0700
Subject: [PATCH 34/71] BUG: item_cache invalidation in get_numeric_data
 (#35882)

---
 doc/source/whatsnew/v1.1.2.rst              |  1 +
 pandas/core/internals/managers.py           |  1 -
 pandas/tests/frame/methods/test_cov_corr.py | 17 +++++++++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index d1a66256454ca..6935a64c7572f 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -36,6 +36,7 @@ Bug fixes
 - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`)
 - Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`)
 - Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`)
+- Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 57a4a8c2ace8a..13bc6a2e82195 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -691,7 +691,6 @@ def get_numeric_data(self, copy: bool = False) -> "BlockManager":
         copy : bool, default False
             Whether to copy the blocks
         """
-        self._consolidate_inplace()
         return self._combine([b for b in self.blocks if b.is_numeric], copy)
 
     def _combine(self: T, blocks: List[Block], copy: bool = True) -> T:
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index d3548b639572d..f307acd8c2178 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -191,6 +191,23 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
         expected = pd.DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
         tm.assert_frame_equal(result, expected)
 
+    def test_corr_item_cache(self):
+        # Check that corr does not lead to incorrect entries in item_cache
+
+        df = pd.DataFrame({"A": range(10)})
+        df["B"] = range(10)[::-1]
+
+        ser = df["A"]  # populate item_cache
+        assert len(df._mgr.blocks) == 2
+
+        _ = df.corr()
+
+        # Check that the corr didnt break link between ser and df
+        ser.values[0] = 99
+        assert df.loc[0, "A"] == 99
+        assert df["A"] is ser
+        assert df.values[0, 0] == 99
+
 
 class TestDataFrameCorrWith:
     def test_corrwith(self, datetime_frame):

From 0d287523dce419be1cda1b18003a7d80d9d618ab Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Sat, 5 Sep 2020 17:18:51 -0400
Subject: [PATCH 35/71] Make MultiIndex.get_loc raise for unhashable type
 (#35914)

Co-authored-by: Jeff Reback <jeff@reback.net>
---
 doc/source/whatsnew/v1.1.2.rst                      |  1 +
 pandas/core/indexes/multi.py                        |  5 +++--
 pandas/tests/frame/indexing/test_indexing.py        |  2 +-
 pandas/tests/indexing/multiindex/test_multiindex.py |  8 ++++++++
 pandas/tests/series/indexing/test_setitem.py        | 11 ++++++++++-
 5 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 6935a64c7572f..c6cfcc6730112 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -17,6 +17,7 @@ Fixed regressions
 - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
 - Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`)
 - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
+- Regression where :meth:`MultiIndex.get_loc` would return a slice spanning the full index when passed an empty list (:issue:`35878`)
 - Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`)
 - Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`)
 - Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index f66b009e6d505..080ece8547479 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2725,6 +2725,8 @@ def get_loc(self, key, method=None):
                 "currently supported for MultiIndex"
             )
 
+        hash(key)
+
         def _maybe_to_slice(loc):
             """convert integer indexer to boolean mask or slice if possible"""
             if not isinstance(loc, np.ndarray) or loc.dtype != "int64":
@@ -2739,8 +2741,7 @@ def _maybe_to_slice(loc):
             mask[loc] = True
             return mask
 
-        if not isinstance(key, (tuple, list)):
-            # not including list here breaks some indexing, xref #30892
+        if not isinstance(key, tuple):
             loc = self._get_level_indexer(key, level=0)
             return _maybe_to_slice(loc)
 
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index d27487dfb8aaa..e4549dfb3e68d 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -2111,7 +2111,7 @@ def test_type_error_multiindex(self):
         )
         dg = df.pivot_table(index="i", columns="c", values=["x", "y"])
 
-        with pytest.raises(TypeError, match="is an invalid key"):
+        with pytest.raises(TypeError, match="unhashable type"):
             dg[:, 0]
 
         index = Index(range(2), name="i")
diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py
index 5e5fcd3db88d8..4565d79c632de 100644
--- a/pandas/tests/indexing/multiindex/test_multiindex.py
+++ b/pandas/tests/indexing/multiindex/test_multiindex.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas._libs.index as _index
 from pandas.errors import PerformanceWarning
@@ -83,3 +84,10 @@ def test_nested_tuples_duplicates(self):
         df3 = df.copy(deep=True)
         df3.loc[[(dti[0], "a")], "c2"] = 1.0
         tm.assert_frame_equal(df3, expected)
+
+    def test_multiindex_get_loc_list_raises(self):
+        # https://github.com/pandas-dev/pandas/issues/35878
+        idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)])
+        msg = "unhashable type"
+        with pytest.raises(TypeError, match=msg):
+            idx.get_loc([])
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index 3463de25ad91b..593d1c78a19e2 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -1,6 +1,7 @@
 import numpy as np
 
-from pandas import NaT, Series, date_range
+from pandas import MultiIndex, NaT, Series, date_range
+import pandas.testing as tm
 
 
 class TestSetitemDT64Values:
@@ -17,3 +18,11 @@ def test_setitem_none_nan(self):
 
         series[5:7] = np.nan
         assert series[6] is NaT
+
+    def test_setitem_multiindex_empty_slice(self):
+        # https://github.com/pandas-dev/pandas/issues/35878
+        idx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
+        result = Series([1, 2], index=idx)
+        expected = result.copy()
+        result.loc[[]] = 0
+        tm.assert_series_equal(result, expected)

From 29c0bc2d858c807d30f5826a84609bfe07176e37 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Sat, 5 Sep 2020 18:36:42 -0400
Subject: [PATCH 36/71] ENH: Make explode work for sets (#35637)

---
 doc/source/whatsnew/v1.2.0.rst              | 2 +-
 pandas/_libs/reshape.pyx                    | 6 ++++--
 pandas/core/frame.py                        | 7 ++++---
 pandas/core/series.py                       | 7 ++++---
 pandas/tests/frame/methods/test_explode.py  | 8 ++++++++
 pandas/tests/series/methods/test_explode.py | 8 ++++++++
 6 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index d7d2e3cf876ca..ff9e803b4990a 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -103,7 +103,7 @@ Other enhancements
 
 - Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`)
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
--
+- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
 -
 
 .. _whatsnew_120.api_breaking.python:
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 5c6c15fb50fed..75dbb4b74aabd 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -124,7 +124,8 @@ def explode(ndarray[object] values):
     counts = np.zeros(n, dtype='int64')
     for i in range(n):
         v = values[i]
-        if c_is_list_like(v, False):
+
+        if c_is_list_like(v, True):
             if len(v):
                 counts[i] += len(v)
             else:
@@ -138,8 +139,9 @@ def explode(ndarray[object] values):
     for i in range(n):
         v = values[i]
 
-        if c_is_list_like(v, False):
+        if c_is_list_like(v, True):
             if len(v):
+                v = list(v)
                 for j in range(len(v)):
                     result[count] = v[j]
                     count += 1
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 29d6fb9aa7d56..150d6e24dbb86 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7091,10 +7091,11 @@ def explode(
 
         Notes
         -----
-        This routine will explode list-likes including lists, tuples,
+        This routine will explode list-likes including lists, tuples, sets,
         Series, and np.ndarray. The result dtype of the subset rows will
-        be object. Scalars will be returned unchanged. Empty list-likes will
-        result in a np.nan for that row.
+        be object. Scalars will be returned unchanged, and empty list-likes will
+        result in a np.nan for that row. In addition, the ordering of rows in the
+        output will be non-deterministic when exploding sets.
 
         Examples
         --------
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d8fdaa2a60252..6cbd93135a2ca 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3829,10 +3829,11 @@ def explode(self, ignore_index: bool = False) -> "Series":
 
         Notes
         -----
-        This routine will explode list-likes including lists, tuples,
+        This routine will explode list-likes including lists, tuples, sets,
         Series, and np.ndarray. The result dtype of the subset rows will
-        be object. Scalars will be returned unchanged. Empty list-likes will
-        result in a np.nan for that row.
+        be object. Scalars will be returned unchanged, and empty list-likes will
+        result in a np.nan for that row. In addition, the ordering of elements in
+        the output will be non-deterministic when exploding sets.
 
         Examples
         --------
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
index 2bbe8ac2d5b81..bd0901387eeed 100644
--- a/pandas/tests/frame/methods/test_explode.py
+++ b/pandas/tests/frame/methods/test_explode.py
@@ -172,3 +172,11 @@ def test_ignore_index():
         {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_explode_sets():
+    # https://github.com/pandas-dev/pandas/issues/35614
+    df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1])
+    result = df.explode(column="a").sort_values(by="a")
+    expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1])
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py
index 4b65e042f7b02..1f0fbd1cc5ecb 100644
--- a/pandas/tests/series/methods/test_explode.py
+++ b/pandas/tests/series/methods/test_explode.py
@@ -126,3 +126,11 @@ def test_ignore_index():
     result = s.explode(ignore_index=True)
     expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
     tm.assert_series_equal(result, expected)
+
+
+def test_explode_sets():
+    # https://github.com/pandas-dev/pandas/issues/35614
+    s = pd.Series([{"a", "b", "c"}], index=[1])
+    result = s.explode().sort_values()
+    expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
+    tm.assert_series_equal(result, expected)

From c67b7076957ff43d74de34760c643a8fe3815d2d Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Sat, 5 Sep 2020 19:13:44 -0400
Subject: [PATCH 37/71] BUG: Don't raise when constructing Series from ordered
 set (#36054)

---
 doc/source/whatsnew/v1.1.2.rst           |  1 +
 pandas/core/construction.py              |  9 ++++++---
 pandas/tests/series/test_constructors.py | 10 ++++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index c6cfcc6730112..b8f6d0e52d058 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -35,6 +35,7 @@ Bug fixes
 - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
 - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`)
 - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`)
+- Bug in :class:`Series` constructor incorrectly raising a ``TypeError`` when passed an ordered set (:issue:`36044`)
 - Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`)
 - Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`)
 - Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`)
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 9d6c2789af25b..3812c306b8eb4 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -438,7 +438,12 @@ def sanitize_array(
             subarr = subarr.copy()
         return subarr
 
-    elif isinstance(data, (list, tuple)) and len(data) > 0:
+    elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
+        if isinstance(data, set):
+            # Raise only for unordered sets, e.g., not for dict_keys
+            raise TypeError("Set type is unordered")
+        data = list(data)
+
         if dtype is not None:
             subarr = _try_cast(data, dtype, copy, raise_cast_failure)
         else:
@@ -450,8 +455,6 @@ def sanitize_array(
         # GH#16804
         arr = np.arange(data.start, data.stop, data.step, dtype="int64")
         subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
-    elif isinstance(data, abc.Set):
-        raise TypeError("Set type is unordered")
     elif lib.is_scalar(data) and index is not None and dtype is not None:
         data = maybe_cast_to_datetime(data, dtype)
         if not lib.is_scalar(data):
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index bcf7039ec9039..ce078059479b4 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -1464,3 +1464,13 @@ def test_constructor_sparse_datetime64(self, values):
         arr = pd.arrays.SparseArray(values, dtype=dtype)
         expected = pd.Series(arr)
         tm.assert_series_equal(result, expected)
+
+    def test_construction_from_ordered_collection(self):
+        # https://github.com/pandas-dev/pandas/issues/36044
+        result = Series({"a": 1, "b": 2}.keys())
+        expected = Series(["a", "b"])
+        tm.assert_series_equal(result, expected)
+
+        result = Series({"a": 1, "b": 2}.values())
+        expected = Series([1, 2])
+        tm.assert_series_equal(result, expected)

From b8181f47af2d1e939a5e20382efc7da7aa0164c2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 6 Sep 2020 18:58:32 +0200
Subject: [PATCH 38/71] REGR: append tz-aware DataFrame with tz-naive values
 (#36115)

---
 doc/source/whatsnew/v1.1.2.rst      |  1 +
 pandas/core/dtypes/concat.py        |  6 ++++--
 pandas/core/internals/concat.py     |  8 ++++++--
 pandas/tests/reshape/test_concat.py | 17 +++++++++++++++++
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index b8f6d0e52d058..f0adc951a5f99 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -16,6 +16,7 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
 - Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`)
+- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`)
 - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
 - Regression where :meth:`MultiIndex.get_loc` would return a slice spanning the full index when passed an empty list (:issue:`35878`)
 - Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`)
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 9902016475b22..dd005752a4832 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -148,15 +148,17 @@ def is_nonempty(x) -> bool:
     any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
 
     if any_ea:
+        # we ignore axis here, as internally concatting with EAs is always
+        # for axis=0
         if not single_dtype:
             target_dtype = find_common_type([x.dtype for x in to_concat])
             to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
-        if isinstance(to_concat[0], ExtensionArray) and axis == 0:
+        if isinstance(to_concat[0], ExtensionArray):
             cls = type(to_concat[0])
             return cls._concat_same_type(to_concat)
         else:
-            return np.concatenate(to_concat, axis=axis)
+            return np.concatenate(to_concat)
 
     elif _contains_datetime or "timedelta" in typs:
         return concat_datetime(to_concat, axis=axis, typs=typs)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index b45f0890cafa4..513c5fed1ca62 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -24,7 +24,7 @@
 from pandas.core.dtypes.missing import isna
 
 import pandas.core.algorithms as algos
-from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays import DatetimeArray, ExtensionArray
 from pandas.core.internals.blocks import make_block
 from pandas.core.internals.managers import BlockManager
 
@@ -335,9 +335,13 @@ def _concatenate_join_units(join_units, concat_axis, copy):
         # the non-EA values are 2D arrays with shape (1, n)
         to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat]
         concat_values = concat_compat(to_concat, axis=0)
-        if not isinstance(concat_values, ExtensionArray):
+        if not isinstance(concat_values, ExtensionArray) or (
+            isinstance(concat_values, DatetimeArray) and concat_values.tz is None
+        ):
             # if the result of concat is not an EA but an ndarray, reshape to
             # 2D to put it a non-EA Block
+            # special case DatetimeArray, which *is* an EA, but is put in a
+            # consolidated 2D block
             concat_values = np.atleast_2d(concat_values)
     else:
         concat_values = concat_compat(to_concat, axis=concat_axis)
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 38cf2cc2402a1..90705f827af25 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1110,6 +1110,23 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
         result = df.append([s, s], ignore_index=True)
         tm.assert_frame_equal(result, expected)
 
+    def test_append_empty_tz_frame_with_datetime64ns(self):
+        # https://github.com/pandas-dev/pandas/issues/35460
+        df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
+
+        # pd.NaT gets inferred as tz-naive, so append result is tz-naive
+        result = df.append({"a": pd.NaT}, ignore_index=True)
+        expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
+        tm.assert_frame_equal(result, expected)
+
+        # also test with typed value to append
+        df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
+        result = df.append(
+            pd.Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True
+        )
+        expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
+        tm.assert_frame_equal(result, expected)
+
 
 class TestConcatenate:
     def test_concat_copy(self):

From 88b5e100971e3b435eb7f14ecc5ec469d4cc1dfa Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Sun, 6 Sep 2020 12:59:43 -0400
Subject: [PATCH 39/71] BUG: Respect errors="ignore" during extension astype
 (#35979)

---
 doc/source/whatsnew/v1.1.2.rst             |  1 +
 pandas/core/internals/blocks.py            |  9 ++++++--
 pandas/tests/frame/methods/test_astype.py  | 22 +++++++++++++++++++
 pandas/tests/series/methods/test_astype.py | 25 +++++++++++++++++++++-
 4 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index f0adc951a5f99..1e946d325ace1 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -34,6 +34,7 @@ Bug fixes
 - Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`)
 - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
 - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
+- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`)
 - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`)
 - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`)
 - Bug in :class:`Series` constructor incorrectly raising a ``TypeError`` when passed an ordered set (:issue:`36044`)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 9f4e535dc787d..263c7c2b6940a 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -581,8 +581,13 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
 
         # force the copy here
         if self.is_extension:
-            # TODO: Should we try/except this astype?
-            values = self.values.astype(dtype)
+            try:
+                values = self.values.astype(dtype)
+            except (ValueError, TypeError):
+                if errors == "ignore":
+                    values = self.values
+                else:
+                    raise
         else:
             if issubclass(dtype.type, str):
 
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index b0fd0496ea81e..d3f256259b15f 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -8,6 +8,7 @@
     CategoricalDtype,
     DataFrame,
     DatetimeTZDtype,
+    Interval,
     IntervalDtype,
     NaT,
     Series,
@@ -565,3 +566,24 @@ def test_astype_empty_dtype_dict(self):
         result = df.astype(dict())
         tm.assert_frame_equal(result, df)
         assert result is not df
+
+    @pytest.mark.parametrize(
+        "df",
+        [
+            DataFrame(Series(["x", "y", "z"], dtype="string")),
+            DataFrame(Series(["x", "y", "z"], dtype="category")),
+            DataFrame(Series(3 * [Timestamp("2020-01-01", tz="UTC")])),
+            DataFrame(Series(3 * [Interval(0, 1)])),
+        ],
+    )
+    @pytest.mark.parametrize("errors", ["raise", "ignore"])
+    def test_astype_ignores_errors_for_extension_dtypes(self, df, errors):
+        # https://github.com/pandas-dev/pandas/issues/35471
+        if errors == "ignore":
+            expected = df
+            result = df.astype(float, errors=errors)
+            tm.assert_frame_equal(result, expected)
+        else:
+            msg = "(Cannot cast)|(could not convert)"
+            with pytest.raises((ValueError, TypeError), match=msg):
+                df.astype(float, errors=errors)
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index 9fdc4179de2e1..b9d90a9fc63dd 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -1,4 +1,6 @@
-from pandas import Series, date_range
+import pytest
+
+from pandas import Interval, Series, Timestamp, date_range
 import pandas._testing as tm
 
 
@@ -23,3 +25,24 @@ def test_astype_dt64tz_to_str(self):
             dtype=object,
         )
         tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "values",
+        [
+            Series(["x", "y", "z"], dtype="string"),
+            Series(["x", "y", "z"], dtype="category"),
+            Series(3 * [Timestamp("2020-01-01", tz="UTC")]),
+            Series(3 * [Interval(0, 1)]),
+        ],
+    )
+    @pytest.mark.parametrize("errors", ["raise", "ignore"])
+    def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
+        # https://github.com/pandas-dev/pandas/issues/35471
+        if errors == "ignore":
+            expected = values
+            result = values.astype(float, errors="ignore")
+            tm.assert_series_equal(result, expected)
+        else:
+            msg = "(Cannot cast)|(could not convert)"
+            with pytest.raises((ValueError, TypeError), match=msg):
+                values.astype(float, errors=errors)

From f9ce5792a6fb3ec9b7ed42fe6c7cd018756973ab Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 6 Sep 2020 10:05:35 -0700
Subject: [PATCH 40/71] De-privatize imported names (#36156)

---
 pandas/_libs/hashtable.pyx                           |  4 ++--
 pandas/_libs/hashtable_class_helper.pxi.in           |  6 +++---
 pandas/_libs/hashtable_func_helper.pxi.in            |  2 +-
 pandas/_libs/parsers.pyx                             |  8 ++++----
 pandas/_testing.py                                   |  8 ++++----
 pandas/compat/__init__.py                            |  4 ++--
 pandas/core/algorithms.py                            |  4 ++--
 pandas/core/arrays/base.py                           |  4 ++--
 pandas/core/arrays/masked.py                         |  4 ++--
 pandas/core/computation/check.py                     | 10 +++++-----
 pandas/core/computation/eval.py                      |  6 +++---
 pandas/core/computation/expressions.py               | 10 +++++-----
 pandas/core/computation/ops.py                       |  6 +++---
 pandas/core/frame.py                                 |  4 ++--
 pandas/core/indexes/multi.py                         |  4 ++--
 pandas/core/internals/__init__.py                    |  4 ++--
 pandas/core/internals/blocks.py                      |  4 ++--
 pandas/core/internals/managers.py                    |  6 +++---
 pandas/core/sorting.py                               |  2 +-
 pandas/core/window/common.py                         |  4 ++--
 pandas/core/window/ewm.py                            |  6 +++---
 pandas/core/window/rolling.py                        |  6 +++---
 pandas/io/common.py                                  |  6 +++---
 pandas/io/excel/_base.py                             |  2 +-
 pandas/io/excel/_odfreader.py                        |  4 ++--
 pandas/io/excel/_openpyxl.py                         |  4 ++--
 pandas/io/excel/_pyxlsb.py                           |  4 ++--
 pandas/io/excel/_xlrd.py                             |  4 ++--
 pandas/io/formats/format.py                          |  8 ++++----
 pandas/io/formats/printing.py                        |  4 ++--
 pandas/tests/computation/test_compat.py              |  6 +++---
 pandas/tests/computation/test_eval.py                | 12 ++++++------
 pandas/tests/extension/json/array.py                 |  2 +-
 pandas/tests/frame/test_arithmetic.py                |  4 ++--
 pandas/tests/frame/test_query_eval.py                |  6 +++---
 pandas/tests/io/formats/test_format.py               |  2 +-
 pandas/tests/io/test_pickle.py                       |  6 +++---
 pandas/tests/test_algos.py                           |  2 +-
 .../moments/test_moments_consistency_rolling.py      |  4 ++--
 pandas/tests/window/test_pairwise.py                 |  2 +-
 pandas/util/_test_decorators.py                      |  4 ++--
 41 files changed, 101 insertions(+), 101 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index ffaf6d6505955..5a0cddb0af197 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -56,7 +56,7 @@ from pandas._libs.missing cimport checknull
 
 
 cdef int64_t NPY_NAT = util.get_nat()
-_SIZE_HINT_LIMIT = (1 << 20) + 7
+SIZE_HINT_LIMIT = (1 << 20) + 7
 
 
 cdef Py_ssize_t _INIT_VEC_CAP = 128
@@ -176,7 +176,7 @@ def unique_label_indices(const int64_t[:] labels):
         ndarray[int64_t, ndim=1] arr
         Int64VectorData *ud = idx.data
 
-    kh_resize_int64(table, min(n, _SIZE_HINT_LIMIT))
+    kh_resize_int64(table, min(n, SIZE_HINT_LIMIT))
 
     with nogil:
         for i in range(n):
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index e0e026fe7cb5e..5e4da96d57e42 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -268,7 +268,7 @@ cdef class {{name}}HashTable(HashTable):
     def __cinit__(self, int64_t size_hint=1):
         self.table = kh_init_{{dtype}}()
         if size_hint is not None:
-            size_hint = min(size_hint, _SIZE_HINT_LIMIT)
+            size_hint = min(size_hint, SIZE_HINT_LIMIT)
             kh_resize_{{dtype}}(self.table, size_hint)
 
     def __len__(self) -> int:
@@ -603,7 +603,7 @@ cdef class StringHashTable(HashTable):
     def __init__(self, int64_t size_hint=1):
         self.table = kh_init_str()
         if size_hint is not None:
-            size_hint = min(size_hint, _SIZE_HINT_LIMIT)
+            size_hint = min(size_hint, SIZE_HINT_LIMIT)
             kh_resize_str(self.table, size_hint)
 
     def __dealloc__(self):
@@ -916,7 +916,7 @@ cdef class PyObjectHashTable(HashTable):
     def __init__(self, int64_t size_hint=1):
         self.table = kh_init_pymap()
         if size_hint is not None:
-            size_hint = min(size_hint, _SIZE_HINT_LIMIT)
+            size_hint = min(size_hint, SIZE_HINT_LIMIT)
             kh_resize_pymap(self.table, size_hint)
 
     def __dealloc__(self):
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 0cc0a6b192df5..fcd081f563f92 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -138,7 +138,7 @@ def duplicated_{{dtype}}(const {{c_type}}[:] values, object keep='first'):
         kh_{{ttype}}_t *table = kh_init_{{ttype}}()
         ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
 
-    kh_resize_{{ttype}}(table, min(n, _SIZE_HINT_LIMIT))
+    kh_resize_{{ttype}}(table, min(n, SIZE_HINT_LIMIT))
 
     if keep not in ('last', 'first', False):
         raise ValueError('keep must be either "first", "last" or False')
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index fa77af6bd5a25..811e28b830921 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -67,7 +67,7 @@ from pandas._libs.khash cimport (
     khiter_t,
 )
 
-from pandas.compat import _get_lzma_file, _import_lzma
+from pandas.compat import get_lzma_file, import_lzma
 from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning
 
 from pandas.core.dtypes.common import (
@@ -82,7 +82,7 @@ from pandas.core.dtypes.common import (
 )
 from pandas.core.dtypes.concat import union_categoricals
 
-lzma = _import_lzma()
+lzma = import_lzma()
 
 cdef:
     float64_t INF = <float64_t>np.inf
@@ -638,9 +638,9 @@ cdef class TextReader:
                                      f'zip file {zip_names}')
             elif self.compression == 'xz':
                 if isinstance(source, str):
-                    source = _get_lzma_file(lzma)(source, 'rb')
+                    source = get_lzma_file(lzma)(source, 'rb')
                 else:
-                    source = _get_lzma_file(lzma)(filename=source)
+                    source = get_lzma_file(lzma)(filename=source)
             else:
                 raise ValueError(f'Unrecognized compression type: '
                                  f'{self.compression}')
diff --git a/pandas/_testing.py b/pandas/_testing.py
index 04d36749a3d8c..7dba578951deb 100644
--- a/pandas/_testing.py
+++ b/pandas/_testing.py
@@ -25,7 +25,7 @@
 from pandas._libs.lib import no_default
 import pandas._libs.testing as _testing
 from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries
-from pandas.compat import _get_lzma_file, _import_lzma
+from pandas.compat import get_lzma_file, import_lzma
 
 from pandas.core.dtypes.common import (
     is_bool,
@@ -70,7 +70,7 @@
 from pandas.io.common import urlopen
 from pandas.io.formats.printing import pprint_thing
 
-lzma = _import_lzma()
+lzma = import_lzma()
 
 _N = 30
 _K = 4
@@ -243,7 +243,7 @@ def decompress_file(path, compression):
     elif compression == "bz2":
         f = bz2.BZ2File(path, "rb")
     elif compression == "xz":
-        f = _get_lzma_file(lzma)(path, "rb")
+        f = get_lzma_file(lzma)(path, "rb")
     elif compression == "zip":
         zip_file = zipfile.ZipFile(path)
         zip_names = zip_file.namelist()
@@ -288,7 +288,7 @@ def write_to_compressed(compression, path, data, dest="test"):
     elif compression == "bz2":
         compress_method = bz2.BZ2File
     elif compression == "xz":
-        compress_method = _get_lzma_file(lzma)
+        compress_method = get_lzma_file(lzma)
     else:
         raise ValueError(f"Unrecognized compression type: {compression}")
 
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index f2018a5c01711..57e378758cc78 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -77,7 +77,7 @@ def is_platform_mac() -> bool:
     return sys.platform == "darwin"
 
 
-def _import_lzma():
+def import_lzma():
     """
     Importing the `lzma` module.
 
@@ -97,7 +97,7 @@ def _import_lzma():
         warnings.warn(msg)
 
 
-def _get_lzma_file(lzma):
+def get_lzma_file(lzma):
     """
     Importing the `LZMAFile` class from the `lzma` module.
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 50ec3714f454b..57e63daff29e4 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -462,7 +462,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
     return f(comps, values)
 
 
-def _factorize_array(
+def factorize_array(
     values, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None
 ) -> Tuple[np.ndarray, np.ndarray]:
     """
@@ -671,7 +671,7 @@ def factorize(
         else:
             na_value = None
 
-        codes, uniques = _factorize_array(
+        codes, uniques = factorize_array(
             values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
         )
 
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 8193d65b3b30c..0c8efda5fc588 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -31,7 +31,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import ops
-from pandas.core.algorithms import _factorize_array, unique
+from pandas.core.algorithms import factorize_array, unique
 from pandas.core.missing import backfill_1d, pad_1d
 from pandas.core.sorting import nargminmax, nargsort
 
@@ -845,7 +845,7 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray"
         #    Complete control over factorization.
         arr, na_value = self._values_for_factorize()
 
-        codes, uniques = _factorize_array(
+        codes, uniques = factorize_array(
             arr, na_sentinel=na_sentinel, na_value=na_value
         )
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 1237dea5c1a64..31274232e2525 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -17,7 +17,7 @@
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import nanops
-from pandas.core.algorithms import _factorize_array, take
+from pandas.core.algorithms import factorize_array, take
 from pandas.core.array_algos import masked_reductions
 from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
 from pandas.core.indexers import check_array_indexer
@@ -287,7 +287,7 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]:
         arr = self._data
         mask = self._mask
 
-        codes, uniques = _factorize_array(arr, na_sentinel=na_sentinel, mask=mask)
+        codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask)
 
         # the hashtables don't handle all different types of bits
         uniques = uniques.astype(self.dtype.numpy_dtype, copy=False)
diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py
index 4d205909b9e2e..6c7261b3b33c9 100644
--- a/pandas/core/computation/check.py
+++ b/pandas/core/computation/check.py
@@ -1,10 +1,10 @@
 from pandas.compat._optional import import_optional_dependency
 
 ne = import_optional_dependency("numexpr", raise_on_missing=False, on_version="warn")
-_NUMEXPR_INSTALLED = ne is not None
-if _NUMEXPR_INSTALLED:
-    _NUMEXPR_VERSION = ne.__version__
+NUMEXPR_INSTALLED = ne is not None
+if NUMEXPR_INSTALLED:
+    NUMEXPR_VERSION = ne.__version__
 else:
-    _NUMEXPR_VERSION = None
+    NUMEXPR_VERSION = None
 
-__all__ = ["_NUMEXPR_INSTALLED", "_NUMEXPR_VERSION"]
+__all__ = ["NUMEXPR_INSTALLED", "NUMEXPR_VERSION"]
diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
index b74f99fca21c7..f6a7935142a32 100644
--- a/pandas/core/computation/eval.py
+++ b/pandas/core/computation/eval.py
@@ -38,10 +38,10 @@ def _check_engine(engine: Optional[str]) -> str:
     str
         Engine name.
     """
-    from pandas.core.computation.check import _NUMEXPR_INSTALLED
+    from pandas.core.computation.check import NUMEXPR_INSTALLED
 
     if engine is None:
-        engine = "numexpr" if _NUMEXPR_INSTALLED else "python"
+        engine = "numexpr" if NUMEXPR_INSTALLED else "python"
 
     if engine not in _engines:
         valid_engines = list(_engines.keys())
@@ -53,7 +53,7 @@ def _check_engine(engine: Optional[str]) -> str:
     # that won't necessarily be import-able)
     # Could potentially be done on engine instantiation
     if engine == "numexpr":
-        if not _NUMEXPR_INSTALLED:
+        if not NUMEXPR_INSTALLED:
             raise ImportError(
                 "'numexpr' is not installed or an unsupported version. Cannot use "
                 "engine='numexpr' for query/eval if 'numexpr' is not installed"
diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py
index a9c0cb0571446..d2c08c343ab4b 100644
--- a/pandas/core/computation/expressions.py
+++ b/pandas/core/computation/expressions.py
@@ -15,15 +15,15 @@
 
 from pandas.core.dtypes.generic import ABCDataFrame
 
-from pandas.core.computation.check import _NUMEXPR_INSTALLED
+from pandas.core.computation.check import NUMEXPR_INSTALLED
 from pandas.core.ops import roperator
 
-if _NUMEXPR_INSTALLED:
+if NUMEXPR_INSTALLED:
     import numexpr as ne
 
 _TEST_MODE = None
 _TEST_RESULT: List[bool] = list()
-_USE_NUMEXPR = _NUMEXPR_INSTALLED
+_USE_NUMEXPR = NUMEXPR_INSTALLED
 _evaluate = None
 _where = None
 
@@ -40,7 +40,7 @@
 def set_use_numexpr(v=True):
     # set/unset to use numexpr
     global _USE_NUMEXPR
-    if _NUMEXPR_INSTALLED:
+    if NUMEXPR_INSTALLED:
         _USE_NUMEXPR = v
 
     # choose what we are going to do
@@ -53,7 +53,7 @@ def set_use_numexpr(v=True):
 def set_numexpr_threads(n=None):
     # if we are using numexpr, set the threads to n
     # otherwise reset
-    if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
+    if NUMEXPR_INSTALLED and _USE_NUMEXPR:
         if n is None:
             n = ne.detect_number_of_cores()
         ne.set_num_threads(n)
diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py
index b2144c45c6323..1fb3910b8577d 100644
--- a/pandas/core/computation/ops.py
+++ b/pandas/core/computation/ops.py
@@ -600,11 +600,11 @@ def __repr__(self) -> str:
 
 class FuncNode:
     def __init__(self, name: str):
-        from pandas.core.computation.check import _NUMEXPR_INSTALLED, _NUMEXPR_VERSION
+        from pandas.core.computation.check import NUMEXPR_INSTALLED, NUMEXPR_VERSION
 
         if name not in _mathops or (
-            _NUMEXPR_INSTALLED
-            and _NUMEXPR_VERSION < LooseVersion("2.6.9")
+            NUMEXPR_INSTALLED
+            and NUMEXPR_VERSION < LooseVersion("2.6.9")
             and name in ("floor", "ceil")
         ):
             raise ValueError(f'"{name}" is not a supported function')
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 150d6e24dbb86..e1a889bf79d95 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5257,7 +5257,7 @@ def duplicated(
         4     True
         dtype: bool
         """
-        from pandas._libs.hashtable import _SIZE_HINT_LIMIT, duplicated_int64
+        from pandas._libs.hashtable import SIZE_HINT_LIMIT, duplicated_int64
 
         from pandas.core.sorting import get_group_index
 
@@ -5266,7 +5266,7 @@ def duplicated(
 
         def f(vals):
             labels, shape = algorithms.factorize(
-                vals, size_hint=min(len(self), _SIZE_HINT_LIMIT)
+                vals, size_hint=min(len(self), SIZE_HINT_LIMIT)
             )
             return labels.astype("i8", copy=False), len(shape)
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 080ece8547479..e49a23935efbd 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1342,9 +1342,9 @@ def format(
             )
 
         if adjoin:
-            from pandas.io.formats.format import _get_adjustment
+            from pandas.io.formats.format import get_adjustment
 
-            adj = _get_adjustment()
+            adj = get_adjustment()
             return adj.adjoin(space, *result_levels).split("\n")
         else:
             return result_levels
diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py
index e12e0d7760ea7..fbccac1c2af67 100644
--- a/pandas/core/internals/__init__.py
+++ b/pandas/core/internals/__init__.py
@@ -10,8 +10,8 @@
     IntBlock,
     ObjectBlock,
     TimeDeltaBlock,
-    _safe_reshape,
     make_block,
+    safe_reshape,
 )
 from pandas.core.internals.concat import concatenate_block_managers
 from pandas.core.internals.managers import (
@@ -33,7 +33,7 @@
     "IntBlock",
     "ObjectBlock",
     "TimeDeltaBlock",
-    "_safe_reshape",
+    "safe_reshape",
     "make_block",
     "BlockManager",
     "SingleBlockManager",
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 263c7c2b6940a..c8da04fbbf987 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1678,7 +1678,7 @@ def putmask(
         if isinstance(new, (np.ndarray, ExtensionArray)) and len(new) == len(mask):
             new = new[mask]
 
-        mask = _safe_reshape(mask, new_values.shape)
+        mask = safe_reshape(mask, new_values.shape)
 
         new_values[mask] = new
         return [self.make_block(values=new_values)]
@@ -2820,7 +2820,7 @@ def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
     return values
 
 
-def _safe_reshape(arr, new_shape):
+def safe_reshape(arr, new_shape):
     """
     If possible, reshape `arr` to have shape `new_shape`,
     with a couple of exceptions (see gh-13012):
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 13bc6a2e82195..3f446874ffd0e 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -47,10 +47,10 @@
     DatetimeTZBlock,
     ExtensionBlock,
     ObjectValuesExtensionBlock,
-    _safe_reshape,
     extend_blocks,
     get_block_type,
     make_block,
+    safe_reshape,
 )
 from pandas.core.internals.ops import blockwise_all, operate_blockwise
 
@@ -1015,7 +1015,7 @@ def value_getitem(placement):
 
         else:
             if value.ndim == self.ndim - 1:
-                value = _safe_reshape(value, (1,) + value.shape)
+                value = safe_reshape(value, (1,) + value.shape)
 
                 def value_getitem(placement):
                     return value
@@ -1138,7 +1138,7 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):
 
         if value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype):
             # TODO(EA2D): special case not needed with 2D EAs
-            value = _safe_reshape(value, (1,) + value.shape)
+            value = safe_reshape(value, (1,) + value.shape)
 
         block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
 
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 8bdd466ae6f33..d03b2f29521b7 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -520,7 +520,7 @@ def compress_group_index(group_index, sort: bool = True):
     space can be huge, so this function compresses it, by computing offsets
     (comp_ids) into the list of unique labels (obs_group_ids).
     """
-    size_hint = min(len(group_index), hashtable._SIZE_HINT_LIMIT)
+    size_hint = min(len(group_index), hashtable.SIZE_HINT_LIMIT)
     table = hashtable.Int64HashTable(size_hint)
 
     group_index = ensure_int64(group_index)
diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
index 2f3058db4493b..df60d2dcf5e84 100644
--- a/pandas/core/window/common.py
+++ b/pandas/core/window/common.py
@@ -92,7 +92,7 @@ def f(x, name=name, *args):
         return self._groupby.apply(f)
 
 
-def _flex_binary_moment(arg1, arg2, f, pairwise=False):
+def flex_binary_moment(arg1, arg2, f, pairwise=False):
 
     if not (
         isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame))
@@ -222,7 +222,7 @@ def dataframe_from_int_dict(data, frame_template):
             return dataframe_from_int_dict(results, arg1)
 
     else:
-        return _flex_binary_moment(arg2, arg1, f)
+        return flex_binary_moment(arg2, arg1, f)
 
 
 def zsqrt(x):
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index 1913b51a68c15..2bd36d8bff155 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -15,7 +15,7 @@
 
 import pandas.core.common as common
 from pandas.core.window.common import _doc_template, _shared_docs, zsqrt
-from pandas.core.window.rolling import _flex_binary_moment, _Rolling
+from pandas.core.window.rolling import _Rolling, flex_binary_moment
 
 _bias_template = """
         Parameters
@@ -416,7 +416,7 @@ def _get_cov(X, Y):
             )
             return X._wrap_result(cov)
 
-        return _flex_binary_moment(
+        return flex_binary_moment(
             self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)
         )
 
@@ -470,6 +470,6 @@ def _cov(x, y):
                 corr = cov / zsqrt(x_var * y_var)
             return X._wrap_result(corr)
 
-        return _flex_binary_moment(
+        return flex_binary_moment(
             self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)
         )
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 558c0eeb0ea65..4c4ec4d700b7f 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -54,8 +54,8 @@
 from pandas.core.window.common import (
     WindowGroupByMixin,
     _doc_template,
-    _flex_binary_moment,
     _shared_docs,
+    flex_binary_moment,
     zsqrt,
 )
 from pandas.core.window.indexers import (
@@ -1774,7 +1774,7 @@ def _get_cov(X, Y):
             bias_adj = count / (count - ddof)
             return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
 
-        return _flex_binary_moment(
+        return flex_binary_moment(
             self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)
         )
 
@@ -1913,7 +1913,7 @@ def _get_corr(a, b):
 
             return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs))
 
-        return _flex_binary_moment(
+        return flex_binary_moment(
             self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)
         )
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index a80b89569f429..3f130401558dd 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -40,12 +40,12 @@
     ModeVar,
     StorageOptions,
 )
-from pandas.compat import _get_lzma_file, _import_lzma
+from pandas.compat import get_lzma_file, import_lzma
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.common import is_file_like
 
-lzma = _import_lzma()
+lzma = import_lzma()
 
 
 _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
@@ -562,7 +562,7 @@ def get_handle(
 
         # XZ Compression
         elif compression == "xz":
-            f = _get_lzma_file(lzma)(path_or_buf, mode)
+            f = get_lzma_file(lzma)(path_or_buf, mode)
 
         # Unrecognized Compression
         else:
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 74eb65521f5b2..87343c22ad4e9 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -346,7 +346,7 @@ def read_excel(
     )
 
 
-class _BaseExcelReader(metaclass=abc.ABCMeta):
+class BaseExcelReader(metaclass=abc.ABCMeta):
     def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
         # If filepath_or_buffer is a url, load the data into a BytesIO
         if is_url(filepath_or_buffer):
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 6cbca59aed97e..02575ab878f6e 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -7,10 +7,10 @@
 
 import pandas as pd
 
-from pandas.io.excel._base import _BaseExcelReader
+from pandas.io.excel._base import BaseExcelReader
 
 
-class _ODFReader(_BaseExcelReader):
+class _ODFReader(BaseExcelReader):
     """
     Read tables out of OpenDocument formatted files.
 
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 89b581da6ed31..f395127902101 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -5,7 +5,7 @@
 from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions
 from pandas.compat._optional import import_optional_dependency
 
-from pandas.io.excel._base import ExcelWriter, _BaseExcelReader
+from pandas.io.excel._base import BaseExcelReader, ExcelWriter
 from pandas.io.excel._util import validate_freeze_panes
 
 if TYPE_CHECKING:
@@ -438,7 +438,7 @@ def write_cells(
                                 setattr(xcell, k, v)
 
 
-class _OpenpyxlReader(_BaseExcelReader):
+class _OpenpyxlReader(BaseExcelReader):
     def __init__(
         self,
         filepath_or_buffer: FilePathOrBuffer,
diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
index c15a52abe4d53..069c3a2eaa643 100644
--- a/pandas/io/excel/_pyxlsb.py
+++ b/pandas/io/excel/_pyxlsb.py
@@ -3,10 +3,10 @@
 from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions
 from pandas.compat._optional import import_optional_dependency
 
-from pandas.io.excel._base import _BaseExcelReader
+from pandas.io.excel._base import BaseExcelReader
 
 
-class _PyxlsbReader(_BaseExcelReader):
+class _PyxlsbReader(BaseExcelReader):
     def __init__(
         self,
         filepath_or_buffer: FilePathOrBuffer,
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index a7fb519af61c6..9057106fb08e5 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -5,10 +5,10 @@
 from pandas._typing import StorageOptions
 from pandas.compat._optional import import_optional_dependency
 
-from pandas.io.excel._base import _BaseExcelReader
+from pandas.io.excel._base import BaseExcelReader
 
 
-class _XlrdReader(_BaseExcelReader):
+class _XlrdReader(BaseExcelReader):
     def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
         """
         Reader using xlrd engine.
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 3dc4290953360..53b2b533215f0 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -256,7 +256,7 @@ def __init__(
             float_format = get_option("display.float_format")
         self.float_format = float_format
         self.dtype = dtype
-        self.adj = _get_adjustment()
+        self.adj = get_adjustment()
 
         self._chk_truncate()
 
@@ -439,7 +439,7 @@ def _get_pad(t):
             return [x.rjust(_get_pad(x)) for x in texts]
 
 
-def _get_adjustment() -> TextAdjustment:
+def get_adjustment() -> TextAdjustment:
     use_east_asian_width = get_option("display.unicode.east_asian_width")
     if use_east_asian_width:
         return EastAsianTextAdjustment()
@@ -628,7 +628,7 @@ def __init__(
             self.columns = frame.columns
 
         self._chk_truncate()
-        self.adj = _get_adjustment()
+        self.adj = get_adjustment()
 
     def _chk_truncate(self) -> None:
         """
@@ -1733,7 +1733,7 @@ def _make_fixed_width(
         return strings
 
     if adj is None:
-        adj = _get_adjustment()
+        adj = get_adjustment()
 
     max_len = max(adj.len(x) for x in strings)
 
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
index 23daab725ec65..edc6fbfff61d7 100644
--- a/pandas/io/formats/printing.py
+++ b/pandas/io/formats/printing.py
@@ -321,7 +321,7 @@ def format_object_summary(
     summary string
     """
     from pandas.io.formats.console import get_console_size
-    from pandas.io.formats.format import _get_adjustment
+    from pandas.io.formats.format import get_adjustment
 
     display_width, _ = get_console_size()
     if display_width is None:
@@ -350,7 +350,7 @@ def format_object_summary(
     is_truncated = n > max_seq_items
 
     # adj can optionally handle unicode eastern asian width
-    adj = _get_adjustment()
+    adj = get_adjustment()
 
     def _extend_line(
         s: str, line: str, value: str, display_width: int, next_line_prefix: str
diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py
index b3fbd8c17d8bf..ead102f532a20 100644
--- a/pandas/tests/computation/test_compat.py
+++ b/pandas/tests/computation/test_compat.py
@@ -12,16 +12,16 @@
 def test_compat():
     # test we have compat with our version of nu
 
-    from pandas.core.computation.check import _NUMEXPR_INSTALLED
+    from pandas.core.computation.check import NUMEXPR_INSTALLED
 
     try:
         import numexpr as ne
 
         ver = ne.__version__
         if LooseVersion(ver) < LooseVersion(VERSIONS["numexpr"]):
-            assert not _NUMEXPR_INSTALLED
+            assert not NUMEXPR_INSTALLED
         else:
-            assert _NUMEXPR_INSTALLED
+            assert NUMEXPR_INSTALLED
     except ImportError:
         pytest.skip("not testing numexpr version compat")
 
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 853ab00853d1b..49066428eb16c 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -18,7 +18,7 @@
 from pandas import DataFrame, Series, compat, date_range
 import pandas._testing as tm
 from pandas.core.computation import pytables
-from pandas.core.computation.check import _NUMEXPR_VERSION
+from pandas.core.computation.check import NUMEXPR_VERSION
 from pandas.core.computation.engines import NumExprClobberingError, _engines
 import pandas.core.computation.expr as expr
 from pandas.core.computation.expr import (
@@ -26,7 +26,7 @@
     PandasExprVisitor,
     PythonExprVisitor,
 )
-from pandas.core.computation.expressions import _NUMEXPR_INSTALLED, _USE_NUMEXPR
+from pandas.core.computation.expressions import _USE_NUMEXPR, NUMEXPR_INSTALLED
 from pandas.core.computation.ops import (
     _arith_ops_syms,
     _binary_math_ops,
@@ -43,7 +43,7 @@
             marks=pytest.mark.skipif(
                 engine == "numexpr" and not _USE_NUMEXPR,
                 reason=f"numexpr enabled->{_USE_NUMEXPR}, "
-                f"installed->{_NUMEXPR_INSTALLED}",
+                f"installed->{NUMEXPR_INSTALLED}",
             ),
         )
         for engine in _engines
@@ -60,15 +60,15 @@ def parser(request):
 
 @pytest.fixture
 def ne_lt_2_6_9():
-    if _NUMEXPR_INSTALLED and _NUMEXPR_VERSION >= LooseVersion("2.6.9"):
+    if NUMEXPR_INSTALLED and NUMEXPR_VERSION >= LooseVersion("2.6.9"):
         pytest.skip("numexpr is >= 2.6.9")
     return "numexpr"
 
 
 @pytest.fixture
 def unary_fns_for_ne():
-    if _NUMEXPR_INSTALLED:
-        if _NUMEXPR_VERSION >= LooseVersion("2.6.9"):
+    if NUMEXPR_INSTALLED:
+        if NUMEXPR_VERSION >= LooseVersion("2.6.9"):
             return _unary_math_ops
         else:
             return tuple(x for x in _unary_math_ops if x not in ("floor", "ceil"))
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 447a6108fc3c7..e3cdeb9c1951f 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -189,7 +189,7 @@ def _concat_same_type(cls, to_concat):
     def _values_for_factorize(self):
         frozen = self._values_for_argsort()
         if len(frozen) == 0:
-            # _factorize_array expects 1-d array, this is a len-0 2-d array.
+            # factorize_array expects 1-d array, this is a len-0 2-d array.
             frozen = frozen.ravel()
         return frozen, ()
 
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index e17357e9845b5..70d0b4e9e835c 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -11,7 +11,7 @@
 from pandas import DataFrame, MultiIndex, Series
 import pandas._testing as tm
 import pandas.core.common as com
-from pandas.core.computation.expressions import _MIN_ELEMENTS, _NUMEXPR_INSTALLED
+from pandas.core.computation.expressions import _MIN_ELEMENTS, NUMEXPR_INSTALLED
 from pandas.tests.frame.common import _check_mixed_float, _check_mixed_int
 
 # -------------------------------------------------------------------
@@ -375,7 +375,7 @@ def test_floordiv_axis0(self):
         result2 = df.floordiv(ser.values, axis=0)
         tm.assert_frame_equal(result2, expected)
 
-    @pytest.mark.skipif(not _NUMEXPR_INSTALLED, reason="numexpr not installed")
+    @pytest.mark.skipif(not NUMEXPR_INSTALLED, reason="numexpr not installed")
     @pytest.mark.parametrize("opname", ["floordiv", "pow"])
     def test_floordiv_axis0_numexpr_path(self, opname):
         # case that goes through numexpr and has to fall back to masked_arith_op
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 56d178daee7fd..2994482fa5139 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -9,7 +9,7 @@
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, date_range
 import pandas._testing as tm
-from pandas.core.computation.check import _NUMEXPR_INSTALLED
+from pandas.core.computation.check import NUMEXPR_INSTALLED
 
 PARSERS = "python", "pandas"
 ENGINES = "python", pytest.param("numexpr", marks=td.skip_if_no_ne)
@@ -39,7 +39,7 @@ def setup_method(self, method):
     def test_query_default(self):
 
         # GH 12749
-        # this should always work, whether _NUMEXPR_INSTALLED or not
+        # this should always work, whether NUMEXPR_INSTALLED or not
         df = self.df
         result = df.query("A>0")
         tm.assert_frame_equal(result, self.expected1)
@@ -65,7 +65,7 @@ def test_query_python(self):
     def test_query_numexpr(self):
 
         df = self.df
-        if _NUMEXPR_INSTALLED:
+        if NUMEXPR_INSTALLED:
             result = df.query("A>0", engine="numexpr")
             tm.assert_frame_equal(result, self.expected1)
             result = df.eval("A+1", engine="numexpr")
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 22942ed75d0f3..1fb957505987f 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -226,7 +226,7 @@ def test_repr_truncation(self):
             r = repr(df)
             r = r[r.find("\n") + 1 :]
 
-            adj = fmt._get_adjustment()
+            adj = fmt.get_adjustment()
 
             for line, value in zip(r.split("\n"), df["B"]):
                 if adj.len(value) + 1 > max_len:
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index d1c6705dd7a6f..2241fe7013568 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -24,7 +24,7 @@
 
 import pytest
 
-from pandas.compat import _get_lzma_file, _import_lzma, is_platform_little_endian
+from pandas.compat import get_lzma_file, import_lzma, is_platform_little_endian
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -33,7 +33,7 @@
 
 from pandas.tseries.offsets import Day, MonthEnd
 
-lzma = _import_lzma()
+lzma = import_lzma()
 
 
 @pytest.fixture(scope="module")
@@ -268,7 +268,7 @@ def compress_file(self, src_path, dest_path, compression):
             with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
                 f.write(src_path, os.path.basename(src_path))
         elif compression == "xz":
-            f = _get_lzma_file(lzma)(dest_path, "w")
+            f = get_lzma_file(lzma)(dest_path, "w")
         else:
             msg = f"Unrecognized compression type: {compression}"
             raise ValueError(msg)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 72a679d980641..ec7413514d430 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -303,7 +303,7 @@ def test_parametrized_factorize_na_value_default(self, data):
         ],
     )
     def test_parametrized_factorize_na_value(self, data, na_value):
-        codes, uniques = algos._factorize_array(data, na_value=na_value)
+        codes, uniques = algos.factorize_array(data, na_value=na_value)
         expected_uniques = data[[1, 3]]
         expected_codes = np.array([-1, 0, -1, 1], dtype=np.intp)
         tm.assert_numpy_array_equal(codes, expected_codes)
diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py
index 158b994cf03ae..dfcbdde466d44 100644
--- a/pandas/tests/window/moments/test_moments_consistency_rolling.py
+++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from pandas import DataFrame, DatetimeIndex, Index, Series
 import pandas._testing as tm
-from pandas.core.window.common import _flex_binary_moment
+from pandas.core.window.common import flex_binary_moment
 from pandas.tests.window.common import (
     check_pairwise_moment,
     moments_consistency_cov_data,
@@ -150,7 +150,7 @@ def test_flex_binary_moment():
     # don't blow the stack
     msg = "arguments to moment function must be of type np.ndarray/Series/DataFrame"
     with pytest.raises(TypeError, match=msg):
-        _flex_binary_moment(5, 6, None)
+        flex_binary_moment(5, 6, None)
 
 
 def test_corr_sanity():
diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py
index 7425cc5df4c2f..7f4e85b385b2d 100644
--- a/pandas/tests/window/test_pairwise.py
+++ b/pandas/tests/window/test_pairwise.py
@@ -41,7 +41,7 @@ def compare(self, result, expected):
     @pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()])
     def test_no_flex(self, f):
 
-        # DataFrame methods (which do not call _flex_binary_moment())
+        # DataFrame methods (which do not call flex_binary_moment())
 
         results = [f(df) for df in self.df1s]
         for (df, result) in zip(self.df1s, results):
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 78facd6694635..94c252eca1671 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -35,7 +35,7 @@ def test_foo():
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import _np_version
 
-from pandas.core.computation.expressions import _NUMEXPR_INSTALLED, _USE_NUMEXPR
+from pandas.core.computation.expressions import _USE_NUMEXPR, NUMEXPR_INSTALLED
 
 
 def safe_import(mod_name: str, min_version: Optional[str] = None):
@@ -196,7 +196,7 @@ def skip_if_no(package: str, min_version: Optional[str] = None):
 )
 skip_if_no_ne = pytest.mark.skipif(
     not _USE_NUMEXPR,
-    reason=f"numexpr enabled->{_USE_NUMEXPR}, installed->{_NUMEXPR_INSTALLED}",
+    reason=f"numexpr enabled->{_USE_NUMEXPR}, installed->{NUMEXPR_INSTALLED}",
 )
 
 

From 911e997579536a059b2438b6b861b0de18a6778c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 6 Sep 2020 10:08:26 -0700
Subject: [PATCH 41/71] REF: share more EA methods (#36154)

---
 pandas/core/arrays/_mixins.py      |  33 +++++++-
 pandas/core/arrays/categorical.py  | 126 ++---------------------------
 pandas/core/arrays/datetimelike.py |  28 ++-----
 pandas/core/arrays/numpy_.py       |  12 +--
 4 files changed, 45 insertions(+), 154 deletions(-)

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index 2976747d66dfa..8b79f8ce66756 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -4,9 +4,10 @@
 
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
-from pandas.util._decorators import cache_readonly
+from pandas.util._decorators import cache_readonly, doc
 
-from pandas.core.algorithms import take, unique
+from pandas.core.algorithms import searchsorted, take, unique
+from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays.base import ExtensionArray
 
 _T = TypeVar("_T", bound="NDArrayBackedExtensionArray")
@@ -120,3 +121,31 @@ def repeat(self: _T, repeats, axis=None) -> _T:
     def unique(self: _T) -> _T:
         new_data = unique(self._ndarray)
         return self._from_backing_data(new_data)
+
+    @classmethod
+    @doc(ExtensionArray._concat_same_type)
+    def _concat_same_type(cls, to_concat, axis: int = 0):
+        dtypes = {str(x.dtype) for x in to_concat}
+        if len(dtypes) != 1:
+            raise ValueError("to_concat must have the same dtype (tz)", dtypes)
+
+        new_values = [x._ndarray for x in to_concat]
+        new_values = np.concatenate(new_values, axis=axis)
+        return to_concat[0]._from_backing_data(new_values)
+
+    @doc(ExtensionArray.searchsorted)
+    def searchsorted(self, value, side="left", sorter=None):
+        return searchsorted(self._ndarray, value, side=side, sorter=sorter)
+
+    @doc(ExtensionArray.shift)
+    def shift(self, periods=1, fill_value=None, axis=0):
+
+        fill_value = self._validate_shift_value(fill_value)
+        new_values = shift(self._ndarray, periods, axis, fill_value)
+
+        return self._from_backing_data(new_values)
+
+    def _validate_shift_value(self, fill_value):
+        # TODO: after deprecation in datetimelikearraymixin is enforced,
+        #  we can remove this and ust validate_fill_value directly
+        return self._validate_fill_value(fill_value)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c3c9009dda659..02305479bef67 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -44,8 +44,7 @@
 from pandas.core.accessor import PandasDelegate, delegate_names
 import pandas.core.algorithms as algorithms
 from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d
-from pandas.core.array_algos.transforms import shift
-from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
+from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 from pandas.core.base import (
     ExtensionArray,
     NoNewAttributesMixin,
@@ -1193,35 +1192,6 @@ def map(self, mapper):
     __le__ = _cat_compare_op(operator.le)
     __ge__ = _cat_compare_op(operator.ge)
 
-    def shift(self, periods, fill_value=None):
-        """
-        Shift Categorical by desired number of periods.
-
-        Parameters
-        ----------
-        periods : int
-            Number of periods to move, can be positive or negative
-        fill_value : object, optional
-            The scalar value to use for newly introduced missing values.
-
-            .. versionadded:: 0.24.0
-
-        Returns
-        -------
-        shifted : Categorical
-        """
-        # since categoricals always have ndim == 1, an axis parameter
-        # doesn't make any sense here.
-        codes = self.codes
-        if codes.ndim > 1:
-            raise NotImplementedError("Categorical with ndim > 1.")
-
-        fill_value = self._validate_fill_value(fill_value)
-
-        codes = shift(codes, periods, axis=0, fill_value=fill_value)
-
-        return self._constructor(codes, dtype=self.dtype, fastpath=True)
-
     def _validate_fill_value(self, fill_value):
         """
         Convert a user-facing fill_value to a representation to use with our
@@ -1383,20 +1353,6 @@ def notna(self):
 
     notnull = notna
 
-    def dropna(self):
-        """
-        Return the Categorical without null values.
-
-        Missing values (-1 in .codes) are detected.
-
-        Returns
-        -------
-        valid : Categorical
-        """
-        result = self[self.notna()]
-
-        return result
-
     def value_counts(self, dropna=True):
         """
         Return a Series containing counts of each category.
@@ -1749,81 +1705,6 @@ def fillna(self, value=None, method=None, limit=None):
 
         return self._constructor(codes, dtype=self.dtype, fastpath=True)
 
-    def take(self: _T, indexer, allow_fill: bool = False, fill_value=None) -> _T:
-        """
-        Take elements from the Categorical.
-
-        Parameters
-        ----------
-        indexer : sequence of int
-            The indices in `self` to take. The meaning of negative values in
-            `indexer` depends on the value of `allow_fill`.
-        allow_fill : bool, default False
-            How to handle negative values in `indexer`.
-
-            * False: negative values in `indices` indicate positional indices
-              from the right. This is similar to
-              :func:`numpy.take`.
-
-            * True: negative values in `indices` indicate missing values
-              (the default). These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
-
-            .. versionchanged:: 1.0.0
-
-               Default value changed from ``True`` to ``False``.
-
-        fill_value : object
-            The value to use for `indices` that are missing (-1), when
-            ``allow_fill=True``. This should be the category, i.e. a value
-            in ``self.categories``, not a code.
-
-        Returns
-        -------
-        Categorical
-            This Categorical will have the same categories and ordered as
-            `self`.
-
-        See Also
-        --------
-        Series.take : Similar method for Series.
-        numpy.ndarray.take : Similar method for NumPy arrays.
-
-        Examples
-        --------
-        >>> cat = pd.Categorical(['a', 'a', 'b'])
-        >>> cat
-        ['a', 'a', 'b']
-        Categories (2, object): ['a', 'b']
-
-        Specify ``allow_fill==False`` to have negative indices mean indexing
-        from the right.
-
-        >>> cat.take([0, -1, -2], allow_fill=False)
-        ['a', 'b', 'a']
-        Categories (2, object): ['a', 'b']
-
-        With ``allow_fill=True``, indices equal to ``-1`` mean "missing"
-        values that should be filled with the `fill_value`, which is
-        ``np.nan`` by default.
-
-        >>> cat.take([0, -1, -1], allow_fill=True)
-        ['a', NaN, NaN]
-        Categories (2, object): ['a', 'b']
-
-        The fill value can be specified.
-
-        >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a')
-        ['a', 'a', 'a']
-        Categories (2, object): ['a', 'b']
-
-        Specifying a fill value that's not in ``self.categories``
-        will raise a ``ValueError``.
-        """
-        return NDArrayBackedExtensionArray.take(
-            self, indexer, allow_fill=allow_fill, fill_value=fill_value
-        )
-
     # ------------------------------------------------------------------
     # NDArrayBackedExtensionArray compat
 
@@ -1861,6 +1742,9 @@ def __contains__(self, key) -> bool:
 
         return contains(self, key, container=self._codes)
 
+    # ------------------------------------------------------------------
+    # Rendering Methods
+
     def _tidy_repr(self, max_vals=10, footer=True) -> str:
         """
         a short repr displaying only max_vals and an optional (but default
@@ -1959,6 +1843,8 @@ def __repr__(self) -> str:
 
         return result
 
+    # ------------------------------------------------------------------
+
     def _maybe_coerce_indexer(self, indexer):
         """
         return an indexer coerced to the codes dtype
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 5a44f87400b79..a5b8032974fa4 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -54,9 +54,8 @@
 
 from pandas.core import missing, nanops, ops
 from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
-from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
-from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
+from pandas.core.arrays.base import ExtensionOpsMixin
 import pandas.core.common as com
 from pandas.core.construction import array, extract_array
 from pandas.core.indexers import check_array_indexer
@@ -672,18 +671,11 @@ def view(self, dtype=None):
 
     @classmethod
     def _concat_same_type(cls, to_concat, axis: int = 0):
-
-        # do not pass tz to set because tzlocal cannot be hashed
-        dtypes = {str(x.dtype) for x in to_concat}
-        if len(dtypes) != 1:
-            raise ValueError("to_concat must have the same dtype (tz)", dtypes)
+        new_obj = super()._concat_same_type(to_concat, axis)
 
         obj = to_concat[0]
         dtype = obj.dtype
 
-        i8values = [x.asi8 for x in to_concat]
-        values = np.concatenate(i8values, axis=axis)
-
         new_freq = None
         if is_period_dtype(dtype):
             new_freq = obj.freq
@@ -697,11 +689,13 @@ def _concat_same_type(cls, to_concat, axis: int = 0):
                 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
                     new_freq = obj.freq
 
-        return cls._simple_new(values, dtype=dtype, freq=new_freq)
+        new_obj._freq = new_freq
+        return new_obj
 
     def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
-        values = self.asi8.copy()
-        return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)
+        new_obj = super().copy()
+        new_obj._freq = self.freq
+        return new_obj
 
     def _values_for_factorize(self):
         return self.asi8, iNaT
@@ -713,14 +707,6 @@ def _from_factorized(cls, values, original):
     def _values_for_argsort(self):
         return self._data
 
-    @Appender(ExtensionArray.shift.__doc__)
-    def shift(self, periods=1, fill_value=None, axis=0):
-
-        fill_value = self._validate_shift_value(fill_value)
-        new_values = shift(self._data, periods, axis, fill_value)
-
-        return type(self)._simple_new(new_values, dtype=self.dtype)
-
     # ------------------------------------------------------------------
     # Validation Methods
     # TODO: try to de-duplicate these, ensure identical behavior
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index 23a4a70734c81..588d68514649a 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -7,7 +7,6 @@
 from pandas._libs import lib
 from pandas._typing import Scalar
 from pandas.compat.numpy import function as nv
-from pandas.util._decorators import doc
 from pandas.util._validators import validate_fillna_kwargs
 
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -16,10 +15,9 @@
 
 from pandas import compat
 from pandas.core import nanops, ops
-from pandas.core.algorithms import searchsorted
 from pandas.core.array_algos import masked_reductions
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
-from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
+from pandas.core.arrays.base import ExtensionOpsMixin
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import backfill_1d, pad_1d
@@ -189,10 +187,6 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "PandasArray
     def _from_factorized(cls, values, original) -> "PandasArray":
         return cls(values)
 
-    @classmethod
-    def _concat_same_type(cls, to_concat) -> "PandasArray":
-        return cls(np.concatenate(to_concat))
-
     def _from_backing_data(self, arr: np.ndarray) -> "PandasArray":
         return type(self)(arr)
 
@@ -423,10 +417,6 @@ def to_numpy(
 
         return result
 
-    @doc(ExtensionArray.searchsorted)
-    def searchsorted(self, value, side="left", sorter=None):
-        return searchsorted(self.to_numpy(), value, side=side, sorter=sorter)
-
     # ------------------------------------------------------------------------
     # Ops
 

From 4480b4a40a6c50749dd9c885a0807acca70f2326 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
Date: Sun, 6 Sep 2020 13:11:04 -0400
Subject: [PATCH 42/71] CLN: Separate transform tests (#36146)

---
 pandas/tests/frame/apply/test_frame_apply.py  | 49 +------------
 .../tests/frame/apply/test_frame_transform.py | 72 +++++++++++++++++++
 pandas/tests/frame/common.py                  | 24 +++++++
 .../tests/series/apply/test_series_apply.py   | 31 +-------
 .../series/apply/test_series_transform.py     | 59 +++++++++++++++
 5 files changed, 157 insertions(+), 78 deletions(-)
 create mode 100644 pandas/tests/frame/apply/test_frame_transform.py
 create mode 100644 pandas/tests/series/apply/test_series_transform.py

diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py
index 5a1e448beb40f..bc09501583e2c 100644
--- a/pandas/tests/frame/apply/test_frame_apply.py
+++ b/pandas/tests/frame/apply/test_frame_apply.py
@@ -1,7 +1,6 @@
 from collections import OrderedDict
 from datetime import datetime
 from itertools import chain
-import operator
 import warnings
 
 import numpy as np
@@ -14,6 +13,7 @@
 import pandas._testing as tm
 from pandas.core.apply import frame_apply
 from pandas.core.base import SpecificationError
+from pandas.tests.frame.common import zip_frames
 
 
 @pytest.fixture
@@ -1058,25 +1058,6 @@ def test_consistency_for_boxed(self, box, int_frame_const_col):
         tm.assert_frame_equal(result, expected)
 
 
-def zip_frames(frames, axis=1):
-    """
-    take a list of frames, zip them together under the
-    assumption that these all have the first frames' index/columns.
-
-    Returns
-    -------
-    new_frame : DataFrame
-    """
-    if axis == 1:
-        columns = frames[0].columns
-        zipped = [f.loc[:, c] for c in columns for f in frames]
-        return pd.concat(zipped, axis=1)
-    else:
-        index = frames[0].index
-        zipped = [f.loc[i, :] for i in index for f in frames]
-        return pd.DataFrame(zipped)
-
-
 class TestDataFrameAggregate:
     def test_agg_transform(self, axis, float_frame):
         other_axis = 1 if axis in {0, "index"} else 0
@@ -1087,16 +1068,10 @@ def test_agg_transform(self, axis, float_frame):
             f_sqrt = np.sqrt(float_frame)
 
             # ufunc
-            result = float_frame.transform(np.sqrt, axis=axis)
             expected = f_sqrt.copy()
-            tm.assert_frame_equal(result, expected)
-
             result = float_frame.apply(np.sqrt, axis=axis)
             tm.assert_frame_equal(result, expected)
 
-            result = float_frame.transform(np.sqrt, axis=axis)
-            tm.assert_frame_equal(result, expected)
-
             # list-like
             result = float_frame.apply([np.sqrt], axis=axis)
             expected = f_sqrt.copy()
@@ -1110,9 +1085,6 @@ def test_agg_transform(self, axis, float_frame):
                 )
             tm.assert_frame_equal(result, expected)
 
-            result = float_frame.transform([np.sqrt], axis=axis)
-            tm.assert_frame_equal(result, expected)
-
             # multiple items in list
             # these are in the order as if we are applying both
             # functions per series and then concatting
@@ -1128,38 +1100,19 @@ def test_agg_transform(self, axis, float_frame):
                 )
             tm.assert_frame_equal(result, expected)
 
-            result = float_frame.transform([np.abs, "sqrt"], axis=axis)
-            tm.assert_frame_equal(result, expected)
-
     def test_transform_and_agg_err(self, axis, float_frame):
         # cannot both transform and agg
-        msg = "transforms cannot produce aggregated results"
-        with pytest.raises(ValueError, match=msg):
-            float_frame.transform(["max", "min"], axis=axis)
-
         msg = "cannot combine transform and aggregation operations"
         with pytest.raises(ValueError, match=msg):
             with np.errstate(all="ignore"):
                 float_frame.agg(["max", "sqrt"], axis=axis)
 
-        with pytest.raises(ValueError, match=msg):
-            with np.errstate(all="ignore"):
-                float_frame.transform(["max", "sqrt"], axis=axis)
-
         df = pd.DataFrame({"A": range(5), "B": 5})
 
         def f():
             with np.errstate(all="ignore"):
                 df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis)
 
-    @pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
-    def test_transform_method_name(self, method):
-        # GH 19760
-        df = pd.DataFrame({"A": [-1, 2]})
-        result = df.transform(method)
-        expected = operator.methodcaller(method)(df)
-        tm.assert_frame_equal(result, expected)
-
     def test_demo(self):
         # demonstration tests
         df = pd.DataFrame({"A": range(5), "B": 5})
diff --git a/pandas/tests/frame/apply/test_frame_transform.py b/pandas/tests/frame/apply/test_frame_transform.py
new file mode 100644
index 0000000000000..3a345215482ed
--- /dev/null
+++ b/pandas/tests/frame/apply/test_frame_transform.py
@@ -0,0 +1,72 @@
+import operator
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.tests.frame.common import zip_frames
+
+
+def test_agg_transform(axis, float_frame):
+    other_axis = 1 if axis in {0, "index"} else 0
+
+    with np.errstate(all="ignore"):
+
+        f_abs = np.abs(float_frame)
+        f_sqrt = np.sqrt(float_frame)
+
+        # ufunc
+        result = float_frame.transform(np.sqrt, axis=axis)
+        expected = f_sqrt.copy()
+        tm.assert_frame_equal(result, expected)
+
+        result = float_frame.transform(np.sqrt, axis=axis)
+        tm.assert_frame_equal(result, expected)
+
+        # list-like
+        expected = f_sqrt.copy()
+        if axis in {0, "index"}:
+            expected.columns = pd.MultiIndex.from_product(
+                [float_frame.columns, ["sqrt"]]
+            )
+        else:
+            expected.index = pd.MultiIndex.from_product([float_frame.index, ["sqrt"]])
+        result = float_frame.transform([np.sqrt], axis=axis)
+        tm.assert_frame_equal(result, expected)
+
+        # multiple items in list
+        # these are in the order as if we are applying both
+        # functions per series and then concatting
+        expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
+        if axis in {0, "index"}:
+            expected.columns = pd.MultiIndex.from_product(
+                [float_frame.columns, ["absolute", "sqrt"]]
+            )
+        else:
+            expected.index = pd.MultiIndex.from_product(
+                [float_frame.index, ["absolute", "sqrt"]]
+            )
+        result = float_frame.transform([np.abs, "sqrt"], axis=axis)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_transform_and_agg_err(axis, float_frame):
+    # cannot both transform and agg
+    msg = "transforms cannot produce aggregated results"
+    with pytest.raises(ValueError, match=msg):
+        float_frame.transform(["max", "min"], axis=axis)
+
+    msg = "cannot combine transform and aggregation operations"
+    with pytest.raises(ValueError, match=msg):
+        with np.errstate(all="ignore"):
+            float_frame.transform(["max", "sqrt"], axis=axis)
+
+
+@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
+def test_transform_method_name(method):
+    # GH 19760
+    df = pd.DataFrame({"A": [-1, 2]})
+    result = df.transform(method)
+    expected = operator.methodcaller(method)(df)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py
index 463a140972ab5..73e60ff389038 100644
--- a/pandas/tests/frame/common.py
+++ b/pandas/tests/frame/common.py
@@ -1,3 +1,8 @@
+from typing import List
+
+from pandas import DataFrame, concat
+
+
 def _check_mixed_float(df, dtype=None):
     # float16 are most likely to be upcasted to float32
     dtypes = dict(A="float32", B="float32", C="float16", D="float64")
@@ -29,3 +34,22 @@ def _check_mixed_int(df, dtype=None):
         assert df.dtypes["C"] == dtypes["C"]
     if dtypes.get("D"):
         assert df.dtypes["D"] == dtypes["D"]
+
+
+def zip_frames(frames: List[DataFrame], axis: int = 1) -> DataFrame:
+    """
+    take a list of frames, zip them together under the
+    assumption that these all have the first frames' index/columns.
+
+    Returns
+    -------
+    new_frame : DataFrame
+    """
+    if axis == 1:
+        columns = frames[0].columns
+        zipped = [f.loc[:, c] for c in columns for f in frames]
+        return concat(zipped, axis=1)
+    else:
+        index = frames[0].index
+        zipped = [f.loc[i, :] for i in index for f in frames]
+        return DataFrame(zipped)
diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py
index 308398642895c..b948317f32062 100644
--- a/pandas/tests/series/apply/test_series_apply.py
+++ b/pandas/tests/series/apply/test_series_apply.py
@@ -209,25 +209,16 @@ def test_transform(self, string_series):
             f_abs = np.abs(string_series)
 
             # ufunc
-            result = string_series.transform(np.sqrt)
             expected = f_sqrt.copy()
-            tm.assert_series_equal(result, expected)
-
             result = string_series.apply(np.sqrt)
             tm.assert_series_equal(result, expected)
 
             # list-like
-            result = string_series.transform([np.sqrt])
+            result = string_series.apply([np.sqrt])
             expected = f_sqrt.to_frame().copy()
             expected.columns = ["sqrt"]
             tm.assert_frame_equal(result, expected)
 
-            result = string_series.transform([np.sqrt])
-            tm.assert_frame_equal(result, expected)
-
-            result = string_series.transform(["sqrt"])
-            tm.assert_frame_equal(result, expected)
-
             # multiple items in list
             # these are in the order as if we are applying both functions per
             # series and then concatting
@@ -236,10 +227,6 @@ def test_transform(self, string_series):
             result = string_series.apply([np.sqrt, np.abs])
             tm.assert_frame_equal(result, expected)
 
-            result = string_series.transform(["sqrt", "abs"])
-            expected.columns = ["sqrt", "abs"]
-            tm.assert_frame_equal(result, expected)
-
             # dict, provide renaming
             expected = pd.concat([f_sqrt, f_abs], axis=1)
             expected.columns = ["foo", "bar"]
@@ -250,19 +237,11 @@ def test_transform(self, string_series):
 
     def test_transform_and_agg_error(self, string_series):
         # we are trying to transform with an aggregator
-        msg = "transforms cannot produce aggregated results"
-        with pytest.raises(ValueError, match=msg):
-            string_series.transform(["min", "max"])
-
         msg = "cannot combine transform and aggregation"
         with pytest.raises(ValueError, match=msg):
             with np.errstate(all="ignore"):
                 string_series.agg(["sqrt", "max"])
 
-        with pytest.raises(ValueError, match=msg):
-            with np.errstate(all="ignore"):
-                string_series.transform(["sqrt", "max"])
-
         msg = "cannot perform both aggregation and transformation"
         with pytest.raises(ValueError, match=msg):
             with np.errstate(all="ignore"):
@@ -463,14 +442,6 @@ def test_agg_cython_table_raises(self, series, func, expected):
             # e.g. Series('a b'.split()).cumprod() will raise
             series.agg(func)
 
-    def test_transform_none_to_type(self):
-        # GH34377
-        df = pd.DataFrame({"a": [None]})
-
-        msg = "DataFrame constructor called with incompatible data and dtype"
-        with pytest.raises(TypeError, match=msg):
-            df.transform({"a": int})
-
 
 class TestSeriesMap:
     def test_map(self, datetime_series):
diff --git a/pandas/tests/series/apply/test_series_transform.py b/pandas/tests/series/apply/test_series_transform.py
new file mode 100644
index 0000000000000..8bc3d2dc4d0db
--- /dev/null
+++ b/pandas/tests/series/apply/test_series_transform.py
@@ -0,0 +1,59 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_transform(string_series):
+    # transforming functions
+
+    with np.errstate(all="ignore"):
+        f_sqrt = np.sqrt(string_series)
+        f_abs = np.abs(string_series)
+
+        # ufunc
+        result = string_series.transform(np.sqrt)
+        expected = f_sqrt.copy()
+        tm.assert_series_equal(result, expected)
+
+        # list-like
+        result = string_series.transform([np.sqrt])
+        expected = f_sqrt.to_frame().copy()
+        expected.columns = ["sqrt"]
+        tm.assert_frame_equal(result, expected)
+
+        result = string_series.transform([np.sqrt])
+        tm.assert_frame_equal(result, expected)
+
+        result = string_series.transform(["sqrt"])
+        tm.assert_frame_equal(result, expected)
+
+        # multiple items in list
+        # these are in the order as if we are applying both functions per
+        # series and then concatting
+        expected = pd.concat([f_sqrt, f_abs], axis=1)
+        result = string_series.transform(["sqrt", "abs"])
+        expected.columns = ["sqrt", "abs"]
+        tm.assert_frame_equal(result, expected)
+
+
+def test_transform_and_agg_error(string_series):
+    # we are trying to transform with an aggregator
+    msg = "transforms cannot produce aggregated results"
+    with pytest.raises(ValueError, match=msg):
+        string_series.transform(["min", "max"])
+
+    msg = "cannot combine transform and aggregation operations"
+    with pytest.raises(ValueError, match=msg):
+        with np.errstate(all="ignore"):
+            string_series.transform(["sqrt", "max"])
+
+
+def test_transform_none_to_type():
+    # GH34377
+    df = pd.DataFrame({"a": [None]})
+
+    msg = "DataFrame constructor called with incompatible data and dtype"
+    with pytest.raises(TypeError, match=msg):
+        df.transform({"a": int})

From c2a0eac713ef21244a6c4c1846bfad863a96bb9b Mon Sep 17 00:00:00 2001
From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
Date: Sun, 6 Sep 2020 13:24:03 -0400
Subject: [PATCH 43/71] CLN: _wrap_applied_output (#36160)

---
 pandas/core/groupby/generic.py | 191 ++++++++++++++++-----------------
 1 file changed, 91 insertions(+), 100 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 728e1ee4653fd..f428085cf441a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1229,113 +1229,104 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
             return self.obj._constructor()
         elif isinstance(first_not_none, DataFrame):
             return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
-        else:
-            key_index = self.grouper.result_index if self.as_index else None
-
-            if isinstance(first_not_none, Series):
-                # this is to silence a DeprecationWarning
-                # TODO: Remove when default dtype of empty Series is object
-                kwargs = first_not_none._construct_axes_dict()
-                backup = create_series_with_explicit_dtype(
-                    dtype_if_empty=object, **kwargs
-                )
-
-                values = [x if (x is not None) else backup for x in values]
 
-            v = values[0]
-
-            if not isinstance(v, (np.ndarray, Index, Series)) and self.as_index:
-                # values are not series or array-like but scalars
-                # self._selection_name not passed through to Series as the
-                # result should not take the name of original selection
-                # of columns
-                return self.obj._constructor_sliced(values, index=key_index)
+        key_index = self.grouper.result_index if self.as_index else None
+
+        if isinstance(first_not_none, Series):
+            # this is to silence a DeprecationWarning
+            # TODO: Remove when default dtype of empty Series is object
+            kwargs = first_not_none._construct_axes_dict()
+            backup = create_series_with_explicit_dtype(dtype_if_empty=object, **kwargs)
+
+            values = [x if (x is not None) else backup for x in values]
+
+        v = values[0]
+
+        if not isinstance(v, (np.ndarray, Index, Series)) and self.as_index:
+            # values are not series or array-like but scalars
+            # self._selection_name not passed through to Series as the
+            # result should not take the name of original selection
+            # of columns
+            return self.obj._constructor_sliced(values, index=key_index)
+
+        if isinstance(v, Series):
+            all_indexed_same = all_indexes_same((x.index for x in values))
+
+            # GH3596
+            # provide a reduction (Frame -> Series) if groups are
+            # unique
+            if self.squeeze:
+                applied_index = self._selected_obj._get_axis(self.axis)
+                singular_series = len(values) == 1 and applied_index.nlevels == 1
+
+                # assign the name to this series
+                if singular_series:
+                    values[0].name = keys[0]
+
+                    # GH2893
+                    # we have series in the values array, we want to
+                    # produce a series:
+                    # if any of the sub-series are not indexed the same
+                    # OR we don't have a multi-index and we have only a
+                    # single values
+                    return self._concat_objects(
+                        keys, values, not_indexed_same=not_indexed_same
+                    )
 
+                # still a series
+                # path added as of GH 5545
+                elif all_indexed_same:
+                    from pandas.core.reshape.concat import concat
+
+                    return concat(values)
+
+            if not all_indexed_same:
+                # GH 8467
+                return self._concat_objects(keys, values, not_indexed_same=True)
+
+            # Combine values
+            # vstack+constructor is faster than concat and handles MI-columns
+            stacked_values = np.vstack([np.asarray(v) for v in values])
+
+            if self.axis == 0:
+                index = key_index
+                columns = v.index.copy()
+                if columns.name is None:
+                    # GH6124 - propagate name of Series when it's consistent
+                    names = {v.name for v in values}
+                    if len(names) == 1:
+                        columns.name = list(names)[0]
             else:
-                if isinstance(v, Series):
-                    all_indexed_same = all_indexes_same((x.index for x in values))
-
-                    # GH3596
-                    # provide a reduction (Frame -> Series) if groups are
-                    # unique
-                    if self.squeeze:
-                        applied_index = self._selected_obj._get_axis(self.axis)
-                        singular_series = (
-                            len(values) == 1 and applied_index.nlevels == 1
-                        )
-
-                        # assign the name to this series
-                        if singular_series:
-                            values[0].name = keys[0]
-
-                            # GH2893
-                            # we have series in the values array, we want to
-                            # produce a series:
-                            # if any of the sub-series are not indexed the same
-                            # OR we don't have a multi-index and we have only a
-                            # single values
-                            return self._concat_objects(
-                                keys, values, not_indexed_same=not_indexed_same
-                            )
-
-                        # still a series
-                        # path added as of GH 5545
-                        elif all_indexed_same:
-                            from pandas.core.reshape.concat import concat
-
-                            return concat(values)
-
-                    if not all_indexed_same:
-                        # GH 8467
-                        return self._concat_objects(keys, values, not_indexed_same=True)
-
-                    # Combine values
-                    # vstack+constructor is faster than concat and handles MI-columns
-                    stacked_values = np.vstack([np.asarray(v) for v in values])
-
-                    if self.axis == 0:
-                        index = key_index
-                        columns = v.index.copy()
-                        if columns.name is None:
-                            # GH6124 - propagate name of Series when it's consistent
-                            names = {v.name for v in values}
-                            if len(names) == 1:
-                                columns.name = list(names)[0]
-                    else:
-                        index = v.index
-                        columns = key_index
-                        stacked_values = stacked_values.T
-
-                    result = self.obj._constructor(
-                        stacked_values, index=index, columns=columns
-                    )
+                index = v.index
+                columns = key_index
+                stacked_values = stacked_values.T
 
-                elif not self.as_index:
-                    # We add grouping column below, so create a frame here
-                    result = DataFrame(
-                        values, index=key_index, columns=[self._selection]
-                    )
-                else:
-                    # GH#1738: values is list of arrays of unequal lengths
-                    #  fall through to the outer else clause
-                    # TODO: sure this is right?  we used to do this
-                    #  after raising AttributeError above
-                    return self.obj._constructor_sliced(
-                        values, index=key_index, name=self._selection_name
-                    )
+            result = self.obj._constructor(stacked_values, index=index, columns=columns)
 
-                # if we have date/time like in the original, then coerce dates
-                # as we are stacking can easily have object dtypes here
-                so = self._selected_obj
-                if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
-                    result = _recast_datetimelike_result(result)
-                else:
-                    result = result._convert(datetime=True)
+        elif not self.as_index:
+            # We add grouping column below, so create a frame here
+            result = DataFrame(values, index=key_index, columns=[self._selection])
+        else:
+            # GH#1738: values is list of arrays of unequal lengths
+            #  fall through to the outer else clause
+            # TODO: sure this is right?  we used to do this
+            #  after raising AttributeError above
+            return self.obj._constructor_sliced(
+                values, index=key_index, name=self._selection_name
+            )
+
+        # if we have date/time like in the original, then coerce dates
+        # as we are stacking can easily have object dtypes here
+        so = self._selected_obj
+        if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any():
+            result = _recast_datetimelike_result(result)
+        else:
+            result = result._convert(datetime=True)
 
-                if not self.as_index:
-                    self._insert_inaxis_grouper_inplace(result)
+        if not self.as_index:
+            self._insert_inaxis_grouper_inplace(result)
 
-                return self._reindex_output(result)
+        return self._reindex_output(result)
 
     def _transform_general(
         self, func, *args, engine="cython", engine_kwargs=None, **kwargs

From 366f63cfc25201f8fc354a9c51d03f0e974f6f32 Mon Sep 17 00:00:00 2001
From: Alex Kirko <alexander.kirko@gmail.com>
Date: Sun, 6 Sep 2020 20:47:40 +0300
Subject: [PATCH 44/71] BUG: allow missing values in Index when calling
 Index.sort_values (#35604)

---
 doc/source/whatsnew/v1.2.0.rst                |  3 +-
 pandas/conftest.py                            | 23 ++++++++
 pandas/core/indexes/base.py                   | 27 ++++++++--
 .../tests/indexes/interval/test_interval.py   |  2 +-
 pandas/tests/indexes/period/test_ops.py       | 16 ++++--
 pandas/tests/indexes/test_common.py           | 52 ++++++++++++++++++-
 6 files changed, 112 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index ff9e803b4990a..b4fdbf9588ffe 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -270,8 +270,9 @@ Interval
 
 Indexing
 ^^^^^^^^
+
 - Bug in :meth:`PeriodIndex.get_loc` incorrectly raising ``ValueError`` on non-datelike strings instead of ``KeyError``, causing similar errors in :meth:`Series.__geitem__`, :meth:`Series.__contains__`, and :meth:`Series.loc.__getitem__` (:issue:`34240`)
--
+- Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order. (:issue:`35584`)
 -
 
 Missing
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 0878380d00837..5474005a63b8e 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -437,6 +437,29 @@ def index(request):
 index_fixture2 = index
 
 
+@pytest.fixture(params=indices_dict.keys())
+def index_with_missing(request):
+    """
+    Fixture for indices with missing values
+    """
+    if request.param in ["int", "uint", "range", "empty", "repeats"]:
+        pytest.xfail("missing values not supported")
+    # GH 35538. Use deep copy to avoid illusive bug on np-dev
+    # Azure pipeline that writes into indices_dict despite copy
+    ind = indices_dict[request.param].copy(deep=True)
+    vals = ind.values
+    if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]:
+        # For setting missing values in the top level of MultiIndex
+        vals = ind.tolist()
+        vals[0] = tuple([None]) + vals[0][1:]
+        vals[-1] = tuple([None]) + vals[-1][1:]
+        return MultiIndex.from_tuples(vals)
+    else:
+        vals[0] = None
+        vals[-1] = None
+        return type(ind)(vals)
+
+
 # ----------------------------------------------------------------
 # Series'
 # ----------------------------------------------------------------
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 65b5dfb6df911..a1bc8a4659b24 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -88,7 +88,7 @@
 import pandas.core.missing as missing
 from pandas.core.ops import get_op_result_name
 from pandas.core.ops.invalid import make_invalid_op
-from pandas.core.sorting import ensure_key_mapped
+from pandas.core.sorting import ensure_key_mapped, nargsort
 from pandas.core.strings import StringMethods
 
 from pandas.io.formats.printing import (
@@ -4443,7 +4443,11 @@ def asof_locs(self, where, mask):
         return result
 
     def sort_values(
-        self, return_indexer=False, ascending=True, key: Optional[Callable] = None
+        self,
+        return_indexer=False,
+        ascending=True,
+        na_position: str_t = "last",
+        key: Optional[Callable] = None,
     ):
         """
         Return a sorted copy of the index.
@@ -4457,6 +4461,12 @@ def sort_values(
             Should the indices that would sort the index be returned.
         ascending : bool, default True
             Should the index values be sorted in an ascending order.
+        na_position : {'first' or 'last'}, default 'last'
+            Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
+            the end.
+
+            .. versionadded:: 1.2.0
+
         key : callable, optional
             If not None, apply the key function to the index values
             before sorting. This is similar to the `key` argument in the
@@ -4497,9 +4507,16 @@ def sort_values(
         """
         idx = ensure_key_mapped(self, key)
 
-        _as = idx.argsort()
-        if not ascending:
-            _as = _as[::-1]
+        # GH 35584. Sort missing values according to na_position kwarg
+        # ignore na_position for MutiIndex
+        if not isinstance(self, ABCMultiIndex):
+            _as = nargsort(
+                items=idx, ascending=ascending, na_position=na_position, key=key
+            )
+        else:
+            _as = idx.argsort()
+            if not ascending:
+                _as = _as[::-1]
 
         sorted_index = self.take(_as)
 
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
index a20e542b1edd7..42849e0bbb5c7 100644
--- a/pandas/tests/indexes/interval/test_interval.py
+++ b/pandas/tests/indexes/interval/test_interval.py
@@ -618,7 +618,7 @@ def test_sort_values(self, closed):
         expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
         tm.assert_index_equal(result, expected)
 
-        result = index.sort_values(ascending=False)
+        result = index.sort_values(ascending=False, na_position="first")
         expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
         tm.assert_index_equal(result, expected)
 
diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py
index e7dd76584d780..d1b34c315b682 100644
--- a/pandas/tests/indexes/period/test_ops.py
+++ b/pandas/tests/indexes/period/test_ops.py
@@ -174,9 +174,6 @@ def _check_freq(index, expected_index):
 
             ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
             tm.assert_index_equal(ordered, expected[::-1])
-
-            exp = np.array([2, 1, 3, 4, 0])
-            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
             _check_freq(ordered, idx)
 
         pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
@@ -333,3 +330,16 @@ def test_freq_setter_deprecated(self):
         # warning for setter
         with pytest.raises(AttributeError, match="can't set attribute"):
             idx.freq = pd.offsets.Day()
+
+
+@pytest.mark.xfail(reason="Datetime-like sort_values currently unstable (GH 35922)")
+def test_order_stability_compat():
+    # GH 35584. The new implementation of sort_values for Index.sort_values
+    # is stable when sorting in descending order. Datetime-like sort_values
+    # currently aren't stable. xfail should be removed after
+    # the implementations' behavior is synchronized (xref GH 35922)
+    pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A")
+    iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
+    ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
+    ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
+    tm.assert_numpy_array_equal(indexer1, indexer2)
diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index db260b71e7186..aa6b395176b06 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -13,7 +13,14 @@
 from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion
 
 import pandas as pd
-from pandas import CategoricalIndex, MultiIndex, RangeIndex
+from pandas import (
+    CategoricalIndex,
+    DatetimeIndex,
+    MultiIndex,
+    PeriodIndex,
+    RangeIndex,
+    TimedeltaIndex,
+)
 import pandas._testing as tm
 
 
@@ -391,3 +398,46 @@ def test_astype_preserves_name(self, index, dtype):
             assert result.names == index.names
         else:
             assert result.name == index.name
+
+
+@pytest.mark.parametrize("na_position", [None, "middle"])
+def test_sort_values_invalid_na_position(index_with_missing, na_position):
+    if isinstance(index_with_missing, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
+        # datetime-like indices will get na_position kwarg as part of
+        # synchronizing duplicate-sorting behavior, because we currently expect
+        # them, other indices, and Series to sort differently (xref 35922)
+        pytest.xfail("sort_values does not support na_position kwarg")
+    elif isinstance(index_with_missing, (CategoricalIndex, MultiIndex)):
+        pytest.xfail("missing value sorting order not defined for index type")
+
+    if na_position not in ["first", "last"]:
+        with pytest.raises(
+            ValueError, match=f"invalid na_position: {na_position}",
+        ):
+            index_with_missing.sort_values(na_position=na_position)
+
+
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_sort_values_with_missing(index_with_missing, na_position):
+    # GH 35584. Test that sort_values works with missing values,
+    # sort non-missing and place missing according to na_position
+
+    if isinstance(index_with_missing, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
+        # datetime-like indices will get na_position kwarg as part of
+        # synchronizing duplicate-sorting behavior, because we currently expect
+        # them, other indices, and Series to sort differently (xref 35922)
+        pytest.xfail("sort_values does not support na_position kwarg")
+    elif isinstance(index_with_missing, (CategoricalIndex, MultiIndex)):
+        pytest.xfail("missing value sorting order not defined for index type")
+
+    missing_count = np.sum(index_with_missing.isna())
+    not_na_vals = index_with_missing[index_with_missing.notna()].values
+    sorted_values = np.sort(not_na_vals)
+    if na_position == "first":
+        sorted_values = np.concatenate([[None] * missing_count, sorted_values])
+    else:
+        sorted_values = np.concatenate([sorted_values, [None] * missing_count])
+    expected = type(index_with_missing)(sorted_values)
+
+    result = index_with_missing.sort_values(na_position=na_position)
+    tm.assert_index_equal(result, expected)

From 8631f2e5e5c0c5d95e3381702ffbd34b282df8a2 Mon Sep 17 00:00:00 2001
From: Honfung Wong <honfung.wong@outlook.com>
Date: Mon, 7 Sep 2020 01:49:26 +0800
Subject: [PATCH 45/71] BUG: extra leading space in to_string when index=False
 (#36094)

---
 doc/source/whatsnew/v1.2.0.rst           |  5 ++-
 pandas/io/formats/format.py              | 28 +++++++++++-----
 pandas/tests/io/formats/test_format.py   | 42 +++++++++++++++++++++---
 pandas/tests/io/formats/test_to_latex.py | 22 ++++++-------
 4 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index b4fdbf9588ffe..9a778acba4764 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -214,8 +214,6 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
--
 
 Categorical
 ^^^^^^^^^^^
@@ -257,7 +255,7 @@ Conversion
 
 Strings
 ^^^^^^^
-
+- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)
 -
 -
 
@@ -315,6 +313,7 @@ Groupby/resample/rolling
 - Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
 - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
+- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
 -
 
 Reshaping
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 53b2b533215f0..70e38c3106bdb 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -345,6 +345,7 @@ def _get_formatted_values(self) -> List[str]:
             None,
             float_format=self.float_format,
             na_rep=self.na_rep,
+            leading_space=self.index,
         )
 
     def to_string(self) -> str:
@@ -960,6 +961,7 @@ def _format_col(self, i: int) -> List[str]:
             na_rep=self.na_rep,
             space=self.col_space.get(frame.columns[i]),
             decimal=self.decimal,
+            leading_space=self.index,
         )
 
     def to_html(
@@ -1111,7 +1113,7 @@ def format_array(
     space: Optional[Union[str, int]] = None,
     justify: str = "right",
     decimal: str = ".",
-    leading_space: Optional[bool] = None,
+    leading_space: Optional[bool] = True,
     quoting: Optional[int] = None,
 ) -> List[str]:
     """
@@ -1127,7 +1129,7 @@ def format_array(
     space
     justify
     decimal
-    leading_space : bool, optional
+    leading_space : bool, optional, default True
         Whether the array should be formatted with a leading space.
         When an array as a column of a Series or DataFrame, we do want
         the leading space to pad between columns.
@@ -1194,7 +1196,7 @@ def __init__(
         decimal: str = ".",
         quoting: Optional[int] = None,
         fixed_width: bool = True,
-        leading_space: Optional[bool] = None,
+        leading_space: Optional[bool] = True,
     ):
         self.values = values
         self.digits = digits
@@ -1395,9 +1397,11 @@ def format_values_with(float_format):
         float_format: Optional[FloatFormatType]
         if self.float_format is None:
             if self.fixed_width:
-                float_format = partial(
-                    "{value: .{digits:d}f}".format, digits=self.digits
-                )
+                if self.leading_space is True:
+                    fmt_str = "{value: .{digits:d}f}"
+                else:
+                    fmt_str = "{value:.{digits:d}f}"
+                float_format = partial(fmt_str.format, digits=self.digits)
             else:
                 float_format = self.float_format
         else:
@@ -1429,7 +1433,11 @@ def format_values_with(float_format):
             ).any()
 
         if has_small_values or (too_long and has_large_values):
-            float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
+            if self.leading_space is True:
+                fmt_str = "{value: .{digits:d}e}"
+            else:
+                fmt_str = "{value:.{digits:d}e}"
+            float_format = partial(fmt_str.format, digits=self.digits)
             formatted_values = format_values_with(float_format)
 
         return formatted_values
@@ -1444,7 +1452,11 @@ def _format_strings(self) -> List[str]:
 
 class IntArrayFormatter(GenericArrayFormatter):
     def _format_strings(self) -> List[str]:
-        formatter = self.formatter or (lambda x: f"{x: d}")
+        if self.leading_space is False:
+            formatter_str = lambda x: f"{x:d}".format(x=x)
+        else:
+            formatter_str = lambda x: f"{x: d}".format(x=x)
+        formatter = self.formatter or formatter_str
         fmt_values = [formatter(x) for x in self.values]
         return fmt_values
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 1fb957505987f..f00fa6274fca2 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -1546,11 +1546,11 @@ def test_to_string_no_index(self):
 
         df_s = df.to_string(index=False)
         # Leading space is expected for positive numbers.
-        expected = "  x   y    z\n 11  33  AAA\n 22 -44     "
+        expected = " x   y   z\n11  33 AAA\n22 -44    "
         assert df_s == expected
 
         df_s = df[["y", "x", "z"]].to_string(index=False)
-        expected = "  y   x    z\n 33  11  AAA\n-44  22     "
+        expected = "  y  x   z\n 33 11 AAA\n-44 22    "
         assert df_s == expected
 
     def test_to_string_line_width_no_index(self):
@@ -1565,7 +1565,7 @@ def test_to_string_line_width_no_index(self):
         df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]})
 
         df_s = df.to_string(line_width=1, index=False)
-        expected = "  x  \\\n 11   \n 22   \n 33   \n\n y  \n 4  \n 5  \n 6  "
+        expected = " x  \\\n11   \n22   \n33   \n\n y  \n 4  \n 5  \n 6  "
 
         assert df_s == expected
 
@@ -2269,7 +2269,7 @@ def test_to_string_without_index(self):
         # GH 11729 Test index=False option
         s = Series([1, 2, 3, 4])
         result = s.to_string(index=False)
-        expected = " 1\n" + " 2\n" + " 3\n" + " 4"
+        expected = "1\n" + "2\n" + "3\n" + "4"
         assert result == expected
 
     def test_unicode_name_in_footer(self):
@@ -3391,3 +3391,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
     msg = "buf is not a file name and it has no write method"
     with pytest.raises(TypeError, match=msg):
         getattr(float_frame, method)(buf=object())
+
+
+@pytest.mark.parametrize(
+    "input_array, expected",
+    [
+        ("a", "a"),
+        (["a", "b"], "a\nb"),
+        ([1, "a"], "1\na"),
+        (1, "1"),
+        ([0, -1], " 0\n-1"),
+        (1.0, "1.0"),
+        ([" a", " b"], " a\n b"),
+        ([".1", "1"], ".1\n 1"),
+        (["10", "-10"], " 10\n-10"),
+    ],
+)
+def test_format_remove_leading_space_series(input_array, expected):
+    # GH: 24980
+    s = pd.Series(input_array).to_string(index=False)
+    assert s == expected
+
+
+@pytest.mark.parametrize(
+    "input_array, expected",
+    [
+        ({"A": ["a"]}, "A\na"),
+        ({"A": ["a", "b"], "B": ["c", "dd"]}, "A  B\na  c\nb dd"),
+        ({"A": ["a", 1], "B": ["aa", 1]}, "A  B\na aa\n1  1"),
+    ],
+)
+def test_format_remove_leading_space_dataframe(input_array, expected):
+    # GH: 24980
+    df = pd.DataFrame(input_array).to_string(index=False)
+    assert df == expected
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 96a9ed2b86cf4..9dfd851e91c65 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -50,10 +50,10 @@ def test_to_latex(self, float_frame):
         withoutindex_result = df.to_latex(index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -413,7 +413,7 @@ def test_to_latex_longtable(self):
         withoutindex_result = df.to_latex(index=False, longtable=True)
         withoutindex_expected = r"""\begin{longtable}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
 \endhead
 \midrule
@@ -423,8 +423,8 @@ def test_to_latex_longtable(self):
 
 \bottomrule
 \endlastfoot
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \end{longtable}
 """
 
@@ -663,8 +663,8 @@ def test_to_latex_no_header(self):
         withoutindex_result = df.to_latex(index=False, header=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- 1 &  b1 \\
- 2 &  b2 \\
+1 & b1 \\
+2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -690,10 +690,10 @@ def test_to_latex_specified_header(self):
         withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
-AA &  BB \\
+AA & BB \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """

From 66b3b5aeb0becc0c0d5657f2980e5454b1d59db5 Mon Sep 17 00:00:00 2001
From: Harsh Sharma <51477130+hs2361@users.noreply.github.com>
Date: Mon, 7 Sep 2020 16:46:11 +0530
Subject: [PATCH 46/71] =?UTF-8?q?BUG:=20shows=20correct=20package=20name?=
 =?UTF-8?q?=20when=20import=5Foptional=5Fdependency=20is=20ca=E2=80=A6=20(?=
 =?UTF-8?q?#36134)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 doc/source/whatsnew/v1.1.2.rst |  1 +
 pandas/compat/_optional.py     | 21 +++++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 1e946d325ace1..da261907565a1 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -41,6 +41,7 @@ Bug fixes
 - Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`)
 - Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`)
 - Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`)
+- Bug in :meth:`import_optional_dependency` returning incorrect package names in cases where package name is different from import name (:issue:`35948`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index 689c7c889ef66..40688a3978cfc 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -33,6 +33,19 @@
     "numba": "0.46.0",
 }
 
+# A mapping from import name to package name (on PyPI) for packages where
+# these two names are different.
+
+INSTALL_MAPPING = {
+    "bs4": "beautifulsoup4",
+    "bottleneck": "Bottleneck",
+    "lxml.etree": "lxml",
+    "odf": "odfpy",
+    "pandas_gbq": "pandas-gbq",
+    "sqlalchemy": "SQLAlchemy",
+    "jinja2": "Jinja2",
+}
+
 
 def _get_version(module: types.ModuleType) -> str:
     version = getattr(module, "__version__", None)
@@ -82,9 +95,13 @@ def import_optional_dependency(
         is False, or when the package's version is too old and `on_version`
         is ``'warn'``.
     """
+
+    package_name = INSTALL_MAPPING.get(name)
+    install_name = package_name if package_name is not None else name
+
     msg = (
-        f"Missing optional dependency '{name}'. {extra} "
-        f"Use pip or conda to install {name}."
+        f"Missing optional dependency '{install_name}'. {extra} "
+        f"Use pip or conda to install {install_name}."
     )
     try:
         module = importlib.import_module(name)

From 4550cf1de59bcc0bf7f5e00c1d08ea2bbe15210e Mon Sep 17 00:00:00 2001
From: ivanovmg <41443370+ivanovmg@users.noreply.github.com>
Date: Tue, 8 Sep 2020 01:58:50 +0700
Subject: [PATCH 47/71] REF: simplify latex formatting (#35872)

---
 pandas/io/formats/format.py              |   7 +-
 pandas/io/formats/latex.py               | 778 +++++++++++++++++------
 pandas/tests/io/formats/test_to_latex.py | 102 +++
 3 files changed, 694 insertions(+), 193 deletions(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 70e38c3106bdb..623dc6e6bad91 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -939,17 +939,18 @@ def to_latex(
         """
         from pandas.io.formats.latex import LatexFormatter
 
-        return LatexFormatter(
+        latex_formatter = LatexFormatter(
             self,
-            column_format=column_format,
             longtable=longtable,
+            column_format=column_format,
             multicolumn=multicolumn,
             multicolumn_format=multicolumn_format,
             multirow=multirow,
             caption=caption,
             label=label,
             position=position,
-        ).get_result(buf=buf, encoding=encoding)
+        )
+        return latex_formatter.get_result(buf=buf, encoding=encoding)
 
     def _format_col(self, i: int) -> List[str]:
         frame = self.tr_frame
diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py
index 715b8bbdf5672..8080d953da308 100644
--- a/pandas/io/formats/latex.py
+++ b/pandas/io/formats/latex.py
@@ -1,7 +1,8 @@
 """
 Module for formatting output data in Latex.
 """
-from typing import IO, List, Optional, Tuple
+from abc import ABC, abstractmethod
+from typing import IO, Iterator, List, Optional, Type
 
 import numpy as np
 
@@ -10,56 +11,95 @@
 from pandas.io.formats.format import DataFrameFormatter, TableFormatter
 
 
-class LatexFormatter(TableFormatter):
-    """
-    Used to render a DataFrame to a LaTeX tabular/longtable environment output.
+class RowStringConverter(ABC):
+    r"""Converter for dataframe rows into LaTeX strings.
 
     Parameters
     ----------
     formatter : `DataFrameFormatter`
-    column_format : str, default None
-        The columns format as specified in `LaTeX table format
-        <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
-    longtable : boolean, default False
-        Use a longtable environment instead of tabular.
+        Instance of `DataFrameFormatter`.
+    multicolumn: bool, optional
+        Whether to use \multicolumn macro.
+    multicolumn_format: str, optional
+        Multicolumn format.
+    multirow: bool, optional
+        Whether to use \multirow macro.
 
-    See Also
-    --------
-    HTMLFormatter
     """
 
     def __init__(
         self,
         formatter: DataFrameFormatter,
-        column_format: Optional[str] = None,
-        longtable: bool = False,
         multicolumn: bool = False,
         multicolumn_format: Optional[str] = None,
         multirow: bool = False,
-        caption: Optional[str] = None,
-        label: Optional[str] = None,
-        position: Optional[str] = None,
     ):
         self.fmt = formatter
         self.frame = self.fmt.frame
-        self.bold_rows = self.fmt.bold_rows
-        self.column_format = column_format
-        self.longtable = longtable
         self.multicolumn = multicolumn
         self.multicolumn_format = multicolumn_format
         self.multirow = multirow
-        self.caption = caption
-        self.label = label
-        self.escape = self.fmt.escape
-        self.position = position
-        self._table_float = any(p is not None for p in (caption, label, position))
+        self.clinebuf: List[List[int]] = []
+        self.strcols = self._get_strcols()
+        self.strrows: List[List[str]] = (
+            list(zip(*self.strcols))  # type: ignore[arg-type]
+        )
+
+    def get_strrow(self, row_num: int) -> str:
+        """Get string representation of the row."""
+        row = self.strrows[row_num]
+
+        is_multicol = (
+            row_num < self.column_levels and self.fmt.header and self.multicolumn
+        )
+
+        is_multirow = (
+            row_num >= self.header_levels
+            and self.fmt.index
+            and self.multirow
+            and self.index_levels > 1
+        )
+
+        is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1
+
+        crow = self._preprocess_row(row)
+
+        if is_multicol:
+            crow = self._format_multicolumn(crow)
+        if is_multirow:
+            crow = self._format_multirow(crow, row_num)
+
+        lst = []
+        lst.append(" & ".join(crow))
+        lst.append(" \\\\")
+        if is_cline_maybe_required:
+            cline = self._compose_cline(row_num, len(self.strcols))
+            lst.append(cline)
+        return "".join(lst)
+
+    @property
+    def _header_row_num(self) -> int:
+        """Number of rows in header."""
+        return self.header_levels if self.fmt.header else 0
+
+    @property
+    def index_levels(self) -> int:
+        """Integer number of levels in index."""
+        return self.frame.index.nlevels
+
+    @property
+    def column_levels(self) -> int:
+        return self.frame.columns.nlevels
+
+    @property
+    def header_levels(self) -> int:
+        nlevels = self.column_levels
+        if self.fmt.has_index_names and self.fmt.show_index_names:
+            nlevels += 1
+        return nlevels
 
-    def write_result(self, buf: IO[str]) -> None:
-        """
-        Render a DataFrame to a LaTeX tabular, longtable, or table/tabular
-        environment output.
-        """
-        # string representation of the columns
+    def _get_strcols(self) -> List[List[str]]:
+        """String representation of the columns."""
         if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
             info_line = (
                 f"Empty {type(self.frame).__name__}\n"
@@ -70,12 +110,6 @@ def write_result(self, buf: IO[str]) -> None:
         else:
             strcols = self.fmt._to_str_columns()
 
-        def get_col_type(dtype):
-            if issubclass(dtype.type, np.number):
-                return "r"
-            else:
-                return "l"
-
         # reestablish the MultiIndex that has been joined by _to_str_column
         if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex):
             out = self.frame.index.format(
@@ -107,89 +141,19 @@ def pad_empties(x):
 
             # Get rid of old multiindex column and add new ones
             strcols = out + strcols[1:]
+        return strcols
 
-        if self.column_format is None:
-            dtypes = self.frame.dtypes._values
-            column_format = "".join(map(get_col_type, dtypes))
-            if self.fmt.index:
-                index_format = "l" * self.frame.index.nlevels
-                column_format = index_format + column_format
-        elif not isinstance(self.column_format, str):  # pragma: no cover
-            raise AssertionError(
-                f"column_format must be str or unicode, not {type(column_format)}"
-            )
+    def _preprocess_row(self, row: List[str]) -> List[str]:
+        """Preprocess elements of the row."""
+        if self.fmt.escape:
+            crow = _escape_symbols(row)
         else:
-            column_format = self.column_format
-
-        self._write_tabular_begin(buf, column_format)
-
-        buf.write("\\toprule\n")
+            crow = [x if x else "{}" for x in row]
+        if self.fmt.bold_rows and self.fmt.index:
+            crow = _convert_to_bold(crow, self.index_levels)
+        return crow
 
-        ilevels = self.frame.index.nlevels
-        clevels = self.frame.columns.nlevels
-        nlevels = clevels
-        if self.fmt.has_index_names and self.fmt.show_index_names:
-            nlevels += 1
-        strrows = list(zip(*strcols))
-        self.clinebuf: List[List[int]] = []
-
-        for i, row in enumerate(strrows):
-            if i == nlevels and self.fmt.header:
-                buf.write("\\midrule\n")  # End of header
-                if self.longtable:
-                    buf.write("\\endhead\n")
-                    buf.write("\\midrule\n")
-                    buf.write(
-                        f"\\multicolumn{{{len(row)}}}{{r}}"
-                        "{{Continued on next page}} \\\\\n"
-                    )
-                    buf.write("\\midrule\n")
-                    buf.write("\\endfoot\n\n")
-                    buf.write("\\bottomrule\n")
-                    buf.write("\\endlastfoot\n")
-            if self.escape:
-                # escape backslashes first
-                crow = [
-                    (
-                        x.replace("\\", "\\textbackslash ")
-                        .replace("_", "\\_")
-                        .replace("%", "\\%")
-                        .replace("$", "\\$")
-                        .replace("#", "\\#")
-                        .replace("{", "\\{")
-                        .replace("}", "\\}")
-                        .replace("~", "\\textasciitilde ")
-                        .replace("^", "\\textasciicircum ")
-                        .replace("&", "\\&")
-                        if (x and x != "{}")
-                        else "{}"
-                    )
-                    for x in row
-                ]
-            else:
-                crow = [x if x else "{}" for x in row]
-            if self.bold_rows and self.fmt.index:
-                # bold row labels
-                crow = [
-                    f"\\textbf{{{x}}}"
-                    if j < ilevels and x.strip() not in ["", "{}"]
-                    else x
-                    for j, x in enumerate(crow)
-                ]
-            if i < clevels and self.fmt.header and self.multicolumn:
-                # sum up columns to multicolumns
-                crow = self._format_multicolumn(crow, ilevels)
-            if i >= nlevels and self.fmt.index and self.multirow and ilevels > 1:
-                # sum up rows to multirows
-                crow = self._format_multirow(crow, ilevels, i, strrows)
-            buf.write(" & ".join(crow))
-            buf.write(" \\\\\n")
-            if self.multirow and i < len(strrows) - 1:
-                self._print_cline(buf, i, len(strcols))
-
-        self._write_tabular_end(buf)
-
-    def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]:
+    def _format_multicolumn(self, row: List[str]) -> List[str]:
         r"""
         Combine columns belonging to a group to a single multicolumn entry
         according to self.multicolumn_format
@@ -199,7 +163,7 @@ def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]:
         will become
         \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
         """
-        row2 = list(row[:ilevels])
+        row2 = row[: self.index_levels]
         ncol = 1
         coltext = ""
 
@@ -214,7 +178,7 @@ def append_col():
             else:
                 row2.append(coltext)
 
-        for c in row[ilevels:]:
+        for c in row[self.index_levels :]:
             # if next col has text, write the previous
             if c.strip():
                 if coltext:
@@ -229,9 +193,7 @@ def append_col():
             append_col()
         return row2
 
-    def _format_multirow(
-        self, row: List[str], ilevels: int, i: int, rows: List[Tuple[str, ...]]
-    ) -> List[str]:
+    def _format_multirow(self, row: List[str], i: int) -> List[str]:
         r"""
         Check following rows, whether row should be a multirow
 
@@ -241,10 +203,10 @@ def _format_multirow(
         b & 0 &   \cline{1-2}
                   b & 0 &
         """
-        for j in range(ilevels):
+        for j in range(self.index_levels):
             if row[j].strip():
                 nrow = 1
-                for r in rows[i + 1 :]:
+                for r in self.strrows[i + 1 :]:
                     if not r[j].strip():
                         nrow += 1
                     else:
@@ -256,88 +218,524 @@ def _format_multirow(
                     self.clinebuf.append([i + nrow - 1, j + 1])
         return row
 
-    def _print_cline(self, buf: IO[str], i: int, icol: int) -> None:
+    def _compose_cline(self, i: int, icol: int) -> str:
         """
-        Print clines after multirow-blocks are finished.
+        Create clines after multirow-blocks are finished.
         """
+        lst = []
         for cl in self.clinebuf:
             if cl[0] == i:
-                buf.write(f"\\cline{{{cl[1]:d}-{icol:d}}}\n")
-        # remove entries that have been written to buffer
-        self.clinebuf = [x for x in self.clinebuf if x[0] != i]
+                lst.append(f"\n\\cline{{{cl[1]:d}-{icol:d}}}")
+                # remove entries that have been written to buffer
+                self.clinebuf = [x for x in self.clinebuf if x[0] != i]
+        return "".join(lst)
+
+
+class RowStringIterator(RowStringConverter):
+    """Iterator over rows of the header or the body of the table."""
+
+    @abstractmethod
+    def __iter__(self) -> Iterator[str]:
+        """Iterate over LaTeX string representations of rows."""
+
+
+class RowHeaderIterator(RowStringIterator):
+    """Iterator for the table header rows."""
+
+    def __iter__(self) -> Iterator[str]:
+        for row_num in range(len(self.strrows)):
+            if row_num < self._header_row_num:
+                yield self.get_strrow(row_num)
+
+
+class RowBodyIterator(RowStringIterator):
+    """Iterator for the table body rows."""
+
+    def __iter__(self) -> Iterator[str]:
+        for row_num in range(len(self.strrows)):
+            if row_num >= self._header_row_num:
+                yield self.get_strrow(row_num)
 
-    def _write_tabular_begin(self, buf, column_format: str):
-        """
-        Write the beginning of a tabular environment or
-        nested table/tabular environments including caption and label.
+
+class TableBuilderAbstract(ABC):
+    """
+    Abstract table builder producing string representation of LaTeX table.
+
+    Parameters
+    ----------
+    formatter : `DataFrameFormatter`
+        Instance of `DataFrameFormatter`.
+    column_format: str, optional
+        Column format, for example, 'rcl' for three columns.
+    multicolumn: bool, optional
+        Use multicolumn to enhance MultiIndex columns.
+    multicolumn_format: str, optional
+        The alignment for multicolumns, similar to column_format.
+    multirow: bool, optional
+        Use multirow to enhance MultiIndex rows.
+    caption: str, optional
+        Table caption.
+    label: str, optional
+        LaTeX label.
+    position: str, optional
+        Float placement specifier, for example, 'htb'.
+    """
+
+    def __init__(
+        self,
+        formatter: DataFrameFormatter,
+        column_format: Optional[str] = None,
+        multicolumn: bool = False,
+        multicolumn_format: Optional[str] = None,
+        multirow: bool = False,
+        caption: Optional[str] = None,
+        label: Optional[str] = None,
+        position: Optional[str] = None,
+    ):
+        self.fmt = formatter
+        self.column_format = column_format
+        self.multicolumn = multicolumn
+        self.multicolumn_format = multicolumn_format
+        self.multirow = multirow
+        self.caption = caption
+        self.label = label
+        self.position = position
+
+    def get_result(self) -> str:
+        """String representation of LaTeX table."""
+        elements = [
+            self.env_begin,
+            self.top_separator,
+            self.header,
+            self.middle_separator,
+            self.env_body,
+            self.bottom_separator,
+            self.env_end,
+        ]
+        result = "\n".join([item for item in elements if item])
+        trailing_newline = "\n"
+        result += trailing_newline
+        return result
+
+    @property
+    @abstractmethod
+    def env_begin(self) -> str:
+        """Beginning of the environment."""
+
+    @property
+    @abstractmethod
+    def top_separator(self) -> str:
+        """Top level separator."""
+
+    @property
+    @abstractmethod
+    def header(self) -> str:
+        """Header lines."""
+
+    @property
+    @abstractmethod
+    def middle_separator(self) -> str:
+        """Middle level separator."""
+
+    @property
+    @abstractmethod
+    def env_body(self) -> str:
+        """Environment body."""
+
+    @property
+    @abstractmethod
+    def bottom_separator(self) -> str:
+        """Bottom level separator."""
+
+    @property
+    @abstractmethod
+    def env_end(self) -> str:
+        """End of the environment."""
+
+
+class GenericTableBuilder(TableBuilderAbstract):
+    """Table builder producing string representation of LaTeX table."""
+
+    @property
+    def header(self) -> str:
+        iterator = self._create_row_iterator(over="header")
+        return "\n".join(list(iterator))
+
+    @property
+    def top_separator(self) -> str:
+        return "\\toprule"
+
+    @property
+    def middle_separator(self) -> str:
+        return "\\midrule" if self._is_separator_required() else ""
+
+    @property
+    def env_body(self) -> str:
+        iterator = self._create_row_iterator(over="body")
+        return "\n".join(list(iterator))
+
+    def _is_separator_required(self) -> bool:
+        return bool(self.header and self.env_body)
+
+    @property
+    def _position_macro(self) -> str:
+        r"""Position macro, extracted from self.position, like [h]."""
+        return f"[{self.position}]" if self.position else ""
+
+    @property
+    def _caption_macro(self) -> str:
+        r"""Caption macro, extracted from self.caption, like \caption{cap}."""
+        return f"\\caption{{{self.caption}}}" if self.caption else ""
+
+    @property
+    def _label_macro(self) -> str:
+        r"""Label macro, extracted from self.label, like \label{ref}."""
+        return f"\\label{{{self.label}}}" if self.label else ""
+
+    def _create_row_iterator(self, over: str) -> RowStringIterator:
+        """Create iterator over header or body of the table.
 
         Parameters
         ----------
-        buf : string or file handle
-            File path or object. If not specified, the result is returned as
-            a string.
-        column_format : str
-            The columns format as specified in `LaTeX table format
-            <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl'
-            for 3 columns
+        over : {'body', 'header'}
+            Over what to iterate.
+
+        Returns
+        -------
+        RowStringIterator
+            Iterator over body or header.
         """
-        if self._table_float:
-            # then write output in a nested table/tabular or longtable environment
-            if self.caption is None:
-                caption_ = ""
-            else:
-                caption_ = f"\n\\caption{{{self.caption}}}"
+        iterator_kind = self._select_iterator(over)
+        return iterator_kind(
+            formatter=self.fmt,
+            multicolumn=self.multicolumn,
+            multicolumn_format=self.multicolumn_format,
+            multirow=self.multirow,
+        )
+
+    def _select_iterator(self, over: str) -> Type[RowStringIterator]:
+        """Select proper iterator over table rows."""
+        if over == "header":
+            return RowHeaderIterator
+        elif over == "body":
+            return RowBodyIterator
+        else:
+            msg = f"'over' must be either 'header' or 'body', but {over} was provided"
+            raise ValueError(msg)
+
+
+class LongTableBuilder(GenericTableBuilder):
+    """Concrete table builder for longtable.
+
+    >>> from pandas import DataFrame
+    >>> from pandas.io.formats import format as fmt
+    >>> df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+    >>> formatter = fmt.DataFrameFormatter(df)
+    >>> builder = LongTableBuilder(formatter, caption='caption', label='lab',
+    ...                            column_format='lrl')
+    >>> table = builder.get_result()
+    >>> print(table)
+    \\begin{longtable}{lrl}
+    \\caption{caption}
+    \\label{lab}\\\\
+    \\toprule
+    {} &  a &   b \\\\
+    \\midrule
+    \\endhead
+    \\midrule
+    \\multicolumn{3}{r}{{Continued on next page}} \\\\
+    \\midrule
+    \\endfoot
+    <BLANKLINE>
+    \\bottomrule
+    \\endlastfoot
+    0 &  1 &  b1 \\\\
+    1 &  2 &  b2 \\\\
+    \\end{longtable}
+    <BLANKLINE>
+    """
 
-            if self.label is None:
-                label_ = ""
-            else:
-                label_ = f"\n\\label{{{self.label}}}"
+    @property
+    def env_begin(self) -> str:
+        first_row = (
+            f"\\begin{{longtable}}{self._position_macro}{{{self.column_format}}}"
+        )
+        elements = [first_row, f"{self._caption_and_label()}"]
+        return "\n".join([item for item in elements if item])
+
+    def _caption_and_label(self) -> str:
+        if self.caption or self.label:
+            double_backslash = "\\\\"
+            elements = [f"{self._caption_macro}", f"{self._label_macro}"]
+            caption_and_label = "\n".join([item for item in elements if item])
+            caption_and_label += double_backslash
+            return caption_and_label
+        else:
+            return ""
+
+    @property
+    def middle_separator(self) -> str:
+        iterator = self._create_row_iterator(over="header")
+        elements = [
+            "\\midrule",
+            "\\endhead",
+            "\\midrule",
+            f"\\multicolumn{{{len(iterator.strcols)}}}{{r}}"
+            "{{Continued on next page}} \\\\",
+            "\\midrule",
+            "\\endfoot\n",
+            "\\bottomrule",
+            "\\endlastfoot",
+        ]
+        if self._is_separator_required():
+            return "\n".join(elements)
+        return ""
+
+    @property
+    def bottom_separator(self) -> str:
+        return ""
+
+    @property
+    def env_end(self) -> str:
+        return "\\end{longtable}"
+
+
+class RegularTableBuilder(GenericTableBuilder):
+    """Concrete table builder for regular table.
+
+    >>> from pandas import DataFrame
+    >>> from pandas.io.formats import format as fmt
+    >>> df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+    >>> formatter = fmt.DataFrameFormatter(df)
+    >>> builder = RegularTableBuilder(formatter, caption='caption', label='lab',
+    ...                               column_format='lrc')
+    >>> table = builder.get_result()
+    >>> print(table)
+    \\begin{table}
+    \\centering
+    \\caption{caption}
+    \\label{lab}
+    \\begin{tabular}{lrc}
+    \\toprule
+    {} &  a &   b \\\\
+    \\midrule
+    0 &  1 &  b1 \\\\
+    1 &  2 &  b2 \\\\
+    \\bottomrule
+    \\end{tabular}
+    \\end{table}
+    <BLANKLINE>
+    """
 
-            if self.position is None:
-                position_ = ""
-            else:
-                position_ = f"[{self.position}]"
+    @property
+    def env_begin(self) -> str:
+        elements = [
+            f"\\begin{{table}}{self._position_macro}",
+            "\\centering",
+            f"{self._caption_macro}",
+            f"{self._label_macro}",
+            f"\\begin{{tabular}}{{{self.column_format}}}",
+        ]
+        return "\n".join([item for item in elements if item])
+
+    @property
+    def bottom_separator(self) -> str:
+        return "\\bottomrule"
+
+    @property
+    def env_end(self) -> str:
+        return "\n".join(["\\end{tabular}", "\\end{table}"])
+
+
+class TabularBuilder(GenericTableBuilder):
+    """Concrete table builder for tabular environment.
+
+    >>> from pandas import DataFrame
+    >>> from pandas.io.formats import format as fmt
+    >>> df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+    >>> formatter = fmt.DataFrameFormatter(df)
+    >>> builder = TabularBuilder(formatter, column_format='lrc')
+    >>> table = builder.get_result()
+    >>> print(table)
+    \\begin{tabular}{lrc}
+    \\toprule
+    {} &  a &   b \\\\
+    \\midrule
+    0 &  1 &  b1 \\\\
+    1 &  2 &  b2 \\\\
+    \\bottomrule
+    \\end{tabular}
+    <BLANKLINE>
+    """
 
-            if self.longtable:
-                table_ = f"\\begin{{longtable}}{position_}{{{column_format}}}"
-                tabular_ = "\n"
-            else:
-                table_ = f"\\begin{{table}}{position_}\n\\centering"
-                tabular_ = f"\n\\begin{{tabular}}{{{column_format}}}\n"
-
-            if self.longtable and (self.caption is not None or self.label is not None):
-                # a double-backslash is required at the end of the line
-                # as discussed here:
-                # https://tex.stackexchange.com/questions/219138
-                backlash_ = "\\\\"
-            else:
-                backlash_ = ""
-            buf.write(f"{table_}{caption_}{label_}{backlash_}{tabular_}")
-        else:
-            if self.longtable:
-                tabletype_ = "longtable"
-            else:
-                tabletype_ = "tabular"
-            buf.write(f"\\begin{{{tabletype_}}}{{{column_format}}}\n")
+    @property
+    def env_begin(self) -> str:
+        return f"\\begin{{tabular}}{{{self.column_format}}}"
+
+    @property
+    def bottom_separator(self) -> str:
+        return "\\bottomrule"
+
+    @property
+    def env_end(self) -> str:
+        return "\\end{tabular}"
+
+
+class LatexFormatter(TableFormatter):
+    """
+    Used to render a DataFrame to a LaTeX tabular/longtable environment output.
+
+    Parameters
+    ----------
+    formatter : `DataFrameFormatter`
+    column_format : str, default None
+        The columns format as specified in `LaTeX table format
+        <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
+
+    See Also
+    --------
+    HTMLFormatter
+    """
+
+    def __init__(
+        self,
+        formatter: DataFrameFormatter,
+        longtable: bool = False,
+        column_format: Optional[str] = None,
+        multicolumn: bool = False,
+        multicolumn_format: Optional[str] = None,
+        multirow: bool = False,
+        caption: Optional[str] = None,
+        label: Optional[str] = None,
+        position: Optional[str] = None,
+    ):
+        self.fmt = formatter
+        self.frame = self.fmt.frame
+        self.longtable = longtable
+        self.column_format = column_format  # type: ignore[assignment]
+        self.multicolumn = multicolumn
+        self.multicolumn_format = multicolumn_format
+        self.multirow = multirow
+        self.caption = caption
+        self.label = label
+        self.position = position
 
-    def _write_tabular_end(self, buf):
+    def write_result(self, buf: IO[str]) -> None:
         """
-        Write the end of a tabular environment or nested table/tabular
-        environment.
+        Render a DataFrame to a LaTeX tabular, longtable, or table/tabular
+        environment output.
+        """
+        table_string = self.builder.get_result()
+        buf.write(table_string)
 
-        Parameters
-        ----------
-        buf : string or file handle
-            File path or object. If not specified, the result is returned as
-            a string.
+    @property
+    def builder(self) -> TableBuilderAbstract:
+        """Concrete table builder.
 
+        Returns
+        -------
+        TableBuilder
         """
+        builder = self._select_builder()
+        return builder(
+            formatter=self.fmt,
+            column_format=self.column_format,
+            multicolumn=self.multicolumn,
+            multicolumn_format=self.multicolumn_format,
+            multirow=self.multirow,
+            caption=self.caption,
+            label=self.label,
+            position=self.position,
+        )
+
+    def _select_builder(self) -> Type[TableBuilderAbstract]:
+        """Select proper table builder."""
         if self.longtable:
-            buf.write("\\end{longtable}\n")
+            return LongTableBuilder
+        if any([self.caption, self.label, self.position]):
+            return RegularTableBuilder
+        return TabularBuilder
+
+    @property
+    def column_format(self) -> str:
+        """Column format."""
+        return self._column_format
+
+    @column_format.setter
+    def column_format(self, input_column_format: Optional[str]) -> None:
+        """Setter for column format."""
+        if input_column_format is None:
+            self._column_format = (
+                self._get_index_format() + self._get_column_format_based_on_dtypes()
+            )
+        elif not isinstance(input_column_format, str):
+            raise ValueError(
+                f"column_format must be str or unicode, "
+                f"not {type(input_column_format)}"
+            )
         else:
-            buf.write("\\bottomrule\n")
-            buf.write("\\end{tabular}\n")
-            if self._table_float:
-                buf.write("\\end{table}\n")
-            else:
-                pass
+            self._column_format = input_column_format
+
+    def _get_column_format_based_on_dtypes(self) -> str:
+        """Get column format based on data type.
+
+        Right alignment for numbers and left - for strings.
+        """
+
+        def get_col_type(dtype):
+            if issubclass(dtype.type, np.number):
+                return "r"
+            return "l"
+
+        dtypes = self.frame.dtypes._values
+        return "".join(map(get_col_type, dtypes))
+
+    def _get_index_format(self) -> str:
+        """Get index column format."""
+        return "l" * self.frame.index.nlevels if self.fmt.index else ""
+
+
+def _escape_symbols(row: List[str]) -> List[str]:
+    """Carry out string replacements for special symbols.
+
+    Parameters
+    ----------
+    row : list
+        List of string, that may contain special symbols.
+
+    Returns
+    -------
+    list
+        list of strings with the special symbols replaced.
+    """
+    return [
+        (
+            x.replace("\\", "\\textbackslash ")
+            .replace("_", "\\_")
+            .replace("%", "\\%")
+            .replace("$", "\\$")
+            .replace("#", "\\#")
+            .replace("{", "\\{")
+            .replace("}", "\\}")
+            .replace("~", "\\textasciitilde ")
+            .replace("^", "\\textasciicircum ")
+            .replace("&", "\\&")
+            if (x and x != "{}")
+            else "{}"
+        )
+        for x in row
+    ]
+
+
+def _convert_to_bold(crow: List[str], ilevels: int) -> List[str]:
+    """Convert elements in ``crow`` to bold."""
+    return [
+        f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x
+        for j, x in enumerate(crow)
+    ]
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 9dfd851e91c65..a98644250b328 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -7,6 +7,14 @@
 from pandas import DataFrame, Series
 import pandas._testing as tm
 
+from pandas.io.formats.format import DataFrameFormatter
+from pandas.io.formats.latex import (
+    RegularTableBuilder,
+    RowBodyIterator,
+    RowHeaderIterator,
+    RowStringConverter,
+)
+
 
 class TestToLatex:
     def test_to_latex_filename(self, float_frame):
@@ -60,6 +68,16 @@ def test_to_latex(self, float_frame):
 
         assert withoutindex_result == withoutindex_expected
 
+    @pytest.mark.parametrize(
+        "bad_column_format",
+        [5, 1.2, ["l", "r"], ("r", "c"), {"r", "c", "l"}, dict(a="r", b="l")],
+    )
+    def test_to_latex_bad_column_format(self, bad_column_format):
+        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+        msg = r"column_format must be str or unicode"
+        with pytest.raises(ValueError, match=msg):
+            df.to_latex(column_format=bad_column_format)
+
     def test_to_latex_format(self, float_frame):
         # GH Bug #9402
         float_frame.to_latex(column_format="ccc")
@@ -930,3 +948,87 @@ def test_to_latex_multindex_header(self):
 \end{tabular}
 """
         assert observed == expected
+
+
+class TestTableBuilder:
+    @pytest.fixture
+    def dataframe(self):
+        return DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+
+    @pytest.fixture
+    def table_builder(self, dataframe):
+        return RegularTableBuilder(formatter=DataFrameFormatter(dataframe))
+
+    def test_create_row_iterator(self, table_builder):
+        iterator = table_builder._create_row_iterator(over="header")
+        assert isinstance(iterator, RowHeaderIterator)
+
+    def test_create_body_iterator(self, table_builder):
+        iterator = table_builder._create_row_iterator(over="body")
+        assert isinstance(iterator, RowBodyIterator)
+
+    def test_create_body_wrong_kwarg_raises(self, table_builder):
+        with pytest.raises(ValueError, match="must be either 'header' or 'body'"):
+            table_builder._create_row_iterator(over="SOMETHING BAD")
+
+
+class TestRowStringConverter:
+    @pytest.mark.parametrize(
+        "row_num, expected",
+        [
+            (0, r"{} &  Design &  ratio &  xy \\"),
+            (1, r"0 &       1 &      4 &  10 \\"),
+            (2, r"1 &       2 &      5 &  11 \\"),
+        ],
+    )
+    def test_get_strrow_normal_without_escape(self, row_num, expected):
+        df = DataFrame({r"Design": [1, 2, 3], r"ratio": [4, 5, 6], r"xy": [10, 11, 12]})
+        row_string_converter = RowStringConverter(
+            formatter=DataFrameFormatter(df, escape=True),
+        )
+        assert row_string_converter.get_strrow(row_num=row_num) == expected
+
+    @pytest.mark.parametrize(
+        "row_num, expected",
+        [
+            (0, r"{} &  Design \# &  ratio, \% &  x\&y \\"),
+            (1, r"0 &         1 &         4 &   10 \\"),
+            (2, r"1 &         2 &         5 &   11 \\"),
+        ],
+    )
+    def test_get_strrow_normal_with_escape(self, row_num, expected):
+        df = DataFrame(
+            {r"Design #": [1, 2, 3], r"ratio, %": [4, 5, 6], r"x&y": [10, 11, 12]}
+        )
+        row_string_converter = RowStringConverter(
+            formatter=DataFrameFormatter(df, escape=True),
+        )
+        assert row_string_converter.get_strrow(row_num=row_num) == expected
+
+    @pytest.mark.parametrize(
+        "row_num, expected",
+        [
+            (0, r"{} & \multicolumn{2}{r}{c1} & \multicolumn{2}{r}{c2} & c3 \\"),
+            (1, r"{} &  0 &  1 &  0 &  1 &  0 \\"),
+            (2, r"0 &  0 &  5 &  0 &  5 &  0 \\"),
+        ],
+    )
+    def test_get_strrow_multindex_multicolumn(self, row_num, expected):
+        df = DataFrame(
+            {
+                ("c1", 0): {x: x for x in range(5)},
+                ("c1", 1): {x: x + 5 for x in range(5)},
+                ("c2", 0): {x: x for x in range(5)},
+                ("c2", 1): {x: x + 5 for x in range(5)},
+                ("c3", 0): {x: x for x in range(5)},
+            }
+        )
+
+        row_string_converter = RowStringConverter(
+            formatter=DataFrameFormatter(df),
+            multicolumn=True,
+            multicolumn_format="r",
+            multirow=True,
+        )
+
+        assert row_string_converter.get_strrow(row_num=row_num) == expected

From 7db9d22e1a52a757c95a6772319572252770b77d Mon Sep 17 00:00:00 2001
From: Jonathan Shreckengost <Jonathanshrek@gmail.com>
Date: Mon, 7 Sep 2020 15:04:42 -0400
Subject: [PATCH 48/71] Comma cleanup (#36168)

---
 pandas/tests/indexing/test_iloc.py           |  2 +-
 pandas/tests/indexing/test_indexing.py       |  2 +-
 pandas/tests/indexing/test_loc.py            | 33 +++++++-------------
 pandas/tests/internals/test_internals.py     |  8 ++---
 pandas/tests/io/formats/test_css.py          | 12 +++----
 pandas/tests/io/formats/test_info.py         | 12 +++----
 pandas/tests/io/json/test_compression.py     |  2 +-
 pandas/tests/io/json/test_pandas.py          | 10 ++----
 pandas/tests/io/parser/test_c_parser_only.py |  4 +--
 pandas/tests/io/parser/test_parse_dates.py   |  4 +--
 pandas/tests/io/parser/test_usecols.py       |  2 +-
 11 files changed, 34 insertions(+), 57 deletions(-)

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 4fae01ec710fd..bfb62835add93 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -56,7 +56,7 @@ def test_is_scalar_access(self):
         assert ser.iloc._is_scalar_access((1,))
 
         df = ser.to_frame()
-        assert df.iloc._is_scalar_access((1, 0,))
+        assert df.iloc._is_scalar_access((1, 0))
 
     def test_iloc_exceeds_bounds(self):
 
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index a080c5d169215..ca8a3ddc95575 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -1004,7 +1004,7 @@ def test_extension_array_cross_section():
 def test_extension_array_cross_section_converts():
     # all numeric columns -> numeric series
     df = pd.DataFrame(
-        {"A": pd.array([1, 2], dtype="Int64"), "B": np.array([1, 2])}, index=["a", "b"],
+        {"A": pd.array([1, 2], dtype="Int64"), "B": np.array([1, 2])}, index=["a", "b"]
     )
     result = df.loc["a"]
     expected = pd.Series([1, 1], dtype="Int64", index=["A", "B"], name="a")
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 193800fae751f..e42d9679464d8 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -29,13 +29,11 @@ def test_loc_getitem_label_out_of_range(self):
 
         # out of range label
         self.check_result(
-            "loc", "f", typs=["ints", "uints", "labels", "mixed", "ts"], fails=KeyError,
+            "loc", "f", typs=["ints", "uints", "labels", "mixed", "ts"], fails=KeyError
         )
         self.check_result("loc", "f", typs=["floats"], fails=KeyError)
         self.check_result("loc", "f", typs=["floats"], fails=KeyError)
-        self.check_result(
-            "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError,
-        )
+        self.check_result("loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError)
         self.check_result("loc", 20, typs=["labels"], fails=KeyError)
         self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError)
         self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError)
@@ -46,26 +44,24 @@ def test_loc_getitem_label_list(self):
         pass
 
     def test_loc_getitem_label_list_with_missing(self):
+        self.check_result("loc", [0, 1, 2], typs=["empty"], fails=KeyError)
         self.check_result(
-            "loc", [0, 1, 2], typs=["empty"], fails=KeyError,
-        )
-        self.check_result(
-            "loc", [0, 2, 10], typs=["ints", "uints", "floats"], axes=0, fails=KeyError,
+            "loc", [0, 2, 10], typs=["ints", "uints", "floats"], axes=0, fails=KeyError
         )
 
         self.check_result(
-            "loc", [3, 6, 7], typs=["ints", "uints", "floats"], axes=1, fails=KeyError,
+            "loc", [3, 6, 7], typs=["ints", "uints", "floats"], axes=1, fails=KeyError
         )
 
         # GH 17758 - MultiIndex and missing keys
         self.check_result(
-            "loc", [(1, 3), (1, 4), (2, 5)], typs=["multi"], axes=0, fails=KeyError,
+            "loc", [(1, 3), (1, 4), (2, 5)], typs=["multi"], axes=0, fails=KeyError
         )
 
     def test_loc_getitem_label_list_fails(self):
         # fails
         self.check_result(
-            "loc", [20, 30, 40], typs=["ints", "uints"], axes=1, fails=KeyError,
+            "loc", [20, 30, 40], typs=["ints", "uints"], axes=1, fails=KeyError
         )
 
     def test_loc_getitem_label_array_like(self):
@@ -95,18 +91,14 @@ def test_loc_getitem_label_slice(self):
         )
 
         self.check_result(
-            "loc", slice("20130102", "20130104"), typs=["ts"], axes=1, fails=TypeError,
+            "loc", slice("20130102", "20130104"), typs=["ts"], axes=1, fails=TypeError
         )
 
-        self.check_result(
-            "loc", slice(2, 8), typs=["mixed"], axes=0, fails=TypeError,
-        )
-        self.check_result(
-            "loc", slice(2, 8), typs=["mixed"], axes=1, fails=KeyError,
-        )
+        self.check_result("loc", slice(2, 8), typs=["mixed"], axes=0, fails=TypeError)
+        self.check_result("loc", slice(2, 8), typs=["mixed"], axes=1, fails=KeyError)
 
         self.check_result(
-            "loc", slice(2, 4, 2), typs=["mixed"], axes=0, fails=TypeError,
+            "loc", slice(2, 4, 2), typs=["mixed"], axes=0, fails=TypeError
         )
 
     def test_setitem_from_duplicate_axis(self):
@@ -669,8 +661,7 @@ def test_loc_setitem_with_scalar_index(self, indexer, value):
                 (1, ["A", "B", "C"]),
                 np.array([7, 8, 9], dtype=np.int64),
                 pd.DataFrame(
-                    [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]],
-                    columns=["A", "B", "C"],
+                    [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"]
                 ),
             ),
             (
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 06ccdd2484a2a..1d73d1e35728b 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -892,16 +892,16 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value):
                 fill_value,
             )
             assert_reindex_indexer_is_ok(
-                mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value,
+                mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value
             )
             assert_reindex_indexer_is_ok(
-                mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value,
+                mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value
             )
             assert_reindex_indexer_is_ok(
                 mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 0, 0], fill_value
             )
             assert_reindex_indexer_is_ok(
-                mgr, ax, pd.Index(["foo", "bar", "baz"]), [-1, 0, -1], fill_value,
+                mgr, ax, pd.Index(["foo", "bar", "baz"]), [-1, 0, -1], fill_value
             )
             assert_reindex_indexer_is_ok(
                 mgr,
@@ -913,7 +913,7 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value):
 
             if mgr.shape[ax] >= 3:
                 assert_reindex_indexer_is_ok(
-                    mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 1, 2], fill_value,
+                    mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 1, 2], fill_value
                 )
 
 
diff --git a/pandas/tests/io/formats/test_css.py b/pandas/tests/io/formats/test_css.py
index 9383f86e335fa..785904fafd31a 100644
--- a/pandas/tests/io/formats/test_css.py
+++ b/pandas/tests/io/formats/test_css.py
@@ -99,11 +99,11 @@ def test_css_side_shorthands(shorthand, expansions):
     top, right, bottom, left = expansions
 
     assert_resolves(
-        f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"},
+        f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"}
     )
 
     assert_resolves(
-        f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"},
+        f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"}
     )
 
     assert_resolves(
@@ -189,9 +189,7 @@ def test_css_absolute_font_size(size, relative_to, resolved):
         inherited = None
     else:
         inherited = {"font-size": relative_to}
-    assert_resolves(
-        f"font-size: {size}", {"font-size": resolved}, inherited=inherited,
-    )
+    assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
 
 
 @pytest.mark.parametrize(
@@ -225,6 +223,4 @@ def test_css_relative_font_size(size, relative_to, resolved):
         inherited = None
     else:
         inherited = {"font-size": relative_to}
-    assert_resolves(
-        f"font-size: {size}", {"font-size": resolved}, inherited=inherited,
-    )
+    assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py
index 877bd1650ae60..7000daeb9b575 100644
--- a/pandas/tests/io/formats/test_info.py
+++ b/pandas/tests/io/formats/test_info.py
@@ -299,7 +299,7 @@ def test_info_memory_usage():
     DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True)
     DataFrame(1, index=["a"], columns=["A"]).index.nbytes
     df = DataFrame(
-        data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"],
+        data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
     )
     df.index.nbytes
     df.memory_usage(index=True)
@@ -336,7 +336,7 @@ def test_info_memory_usage_deep_pypy():
 @pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")
 def test_usage_via_getsizeof():
     df = DataFrame(
-        data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"],
+        data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
     )
     mem = df.memory_usage(deep=True).sum()
     # sys.getsizeof will call the .memory_usage with
@@ -359,16 +359,14 @@ def test_info_memory_usage_qualified():
 
     buf = StringIO()
     df = DataFrame(
-        1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)]),
+        1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])
     )
     df.info(buf=buf)
     assert "+" not in buf.getvalue()
 
     buf = StringIO()
     df = DataFrame(
-        1,
-        columns=list("ab"),
-        index=MultiIndex.from_product([range(3), ["foo", "bar"]]),
+        1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])
     )
     df.info(buf=buf)
     assert "+" in buf.getvalue()
@@ -384,7 +382,7 @@ def memory_usage(f):
     N = 100
     M = len(uppercase)
     index = MultiIndex.from_product(
-        [list(uppercase), date_range("20160101", periods=N)], names=["id", "date"],
+        [list(uppercase), date_range("20160101", periods=N)], names=["id", "date"]
     )
     df = DataFrame({"value": np.random.randn(N * M)}, index=index)
 
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index c0e3220454bf1..a41af9886c617 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -45,7 +45,7 @@ def test_with_s3_url(compression, s3_resource, s3so):
             s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f)
 
     roundtripped_df = pd.read_json(
-        "s3://pandas-test/test-1", compression=compression, storage_options=s3so,
+        "s3://pandas-test/test-1", compression=compression, storage_options=s3so
     )
     tm.assert_frame_equal(df, roundtripped_df)
 
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 59d64e1a6e909..13152f01abb04 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -745,11 +745,7 @@ def test_reconstruction_index(self):
 
     def test_path(self, float_frame, int_frame, datetime_frame):
         with tm.ensure_clean("test.json") as path:
-            for df in [
-                float_frame,
-                int_frame,
-                datetime_frame,
-            ]:
+            for df in [float_frame, int_frame, datetime_frame]:
                 df.to_json(path)
                 read_json(path)
 
@@ -1706,9 +1702,7 @@ def test_to_s3(self, s3_resource, s3so):
         # GH 28375
         mock_bucket_name, target_file = "pandas-test", "test.json"
         df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
-        df.to_json(
-            f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so,
-        )
+        df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
         timeout = 5
         while True:
             if target_file in (
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 50179fc1ec4b8..50d5fb3e49c2a 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -646,9 +646,7 @@ def test_1000_sep_with_decimal(
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "float_precision", [None, "high", "round_trip"],
-)
+@pytest.mark.parametrize("float_precision", [None, "high", "round_trip"])
 @pytest.mark.parametrize(
     "value,expected",
     [
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index ed947755e3419..833186b69c63b 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -1439,7 +1439,7 @@ def test_parse_timezone(all_parsers):
                 end="2018-01-04 09:05:00",
                 freq="1min",
                 tz=pytz.FixedOffset(540),
-            ),
+            )
         ),
         freq=None,
     )
@@ -1553,5 +1553,5 @@ def test_missing_parse_dates_column_raises(
     msg = f"Missing column provided to 'parse_dates': '{missing_cols}'"
     with pytest.raises(ValueError, match=msg):
         parser.read_csv(
-            content, sep=",", names=names, usecols=usecols, parse_dates=parse_dates,
+            content, sep=",", names=names, usecols=usecols, parse_dates=parse_dates
         )
diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py
index d4e049cc3fcc2..7e9c9866a666d 100644
--- a/pandas/tests/io/parser/test_usecols.py
+++ b/pandas/tests/io/parser/test_usecols.py
@@ -199,7 +199,7 @@ def test_usecols_with_whitespace(all_parsers):
         # Column selection by index.
         ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])),
         # Column selection by name.
-        (["0", "1"], DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]),),
+        (["0", "1"], DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"])),
     ],
 )
 def test_usecols_with_integer_like_header(all_parsers, usecols, expected):

From 172c626b217f03bb3357d168e993bd2947dad31e Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 7 Sep 2020 20:05:50 +0100
Subject: [PATCH 49/71] TST: test_datetime64_factorize on 32bit (#36192)

---
 pandas/tests/test_algos.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index ec7413514d430..a2c2ae22a0b62 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -256,7 +256,7 @@ def test_datetime64_factorize(self, writable):
         # GH35650 Verify whether read-only datetime64 array can be factorized
         data = np.array([np.datetime64("2020-01-01T00:00:00.000")])
         data.setflags(write=writable)
-        expected_codes = np.array([0], dtype=np.int64)
+        expected_codes = np.array([0], dtype=np.intp)
         expected_uniques = np.array(
             ["2020-01-01T00:00:00.000000000"], dtype="datetime64[ns]"
         )

From f895c6a73361269dea2a115690510b4b9adcd0df Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 7 Sep 2020 20:39:15 +0100
Subject: [PATCH 50/71] TST: update test_series_factorize_na_sentinel_none for
 32bit (#36191)

---
 pandas/tests/base/test_factorize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
index 9fad9856d53cc..f8cbadb987d29 100644
--- a/pandas/tests/base/test_factorize.py
+++ b/pandas/tests/base/test_factorize.py
@@ -34,7 +34,7 @@ def test_series_factorize_na_sentinel_none():
     ser = pd.Series(values)
     codes, uniques = ser.factorize(na_sentinel=None)
 
-    expected_codes = np.array([0, 1, 0, 2], dtype="int64")
+    expected_codes = np.array([0, 1, 0, 2], dtype=np.intp)
     expected_uniques = pd.Index([1.0, 2.0, np.nan])
 
     tm.assert_numpy_array_equal(codes, expected_codes)

From bb5b86a6130769225aa2b5884a277398a639f25c Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 7 Sep 2020 20:41:42 +0100
Subject: [PATCH 51/71] DOC: move release note for #36155 (#36187)

---
 doc/source/whatsnew/v1.1.2.rst | 1 +
 doc/source/whatsnew/v1.2.0.rst | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index da261907565a1..e9cba3de56920 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -50,6 +50,7 @@ Bug fixes
 Other
 ~~~~~
 - :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize` (:issue:`35667`)
+- :meth:`DataFrame.plot` and meth:`Series.plot` raise ``UserWarning`` about usage of FixedFormatter and FixedLocator (:issue:`35684` and :issue:`35945`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 9a778acba4764..ccaae9f996425 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -300,7 +300,6 @@ Plotting
 ^^^^^^^^
 
 - Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`)
-- meth:`DataFrame.plot` and meth:`Series.plot` raise ``UserWarning`` about usage of FixedFormatter and FixedLocator (:issue:`35684` and :issue:`35945`)
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^

From de2a1dcdb41b564ddef302bffa005fff72340f45 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 7 Sep 2020 12:56:18 -0700
Subject: [PATCH 52/71] REF: use _validate_foo pattern in Categorical (#36181)

---
 pandas/core/arrays/categorical.py | 31 ++++++++++++++++++++++---------
 pandas/core/indexes/category.py   | 11 +++--------
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 02305479bef67..228e630f95863 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1192,6 +1192,26 @@ def map(self, mapper):
     __le__ = _cat_compare_op(operator.le)
     __ge__ = _cat_compare_op(operator.ge)
 
+    def _validate_insert_value(self, value) -> int:
+        code = self.categories.get_indexer([value])
+        if (code == -1) and not (is_scalar(value) and isna(value)):
+            raise TypeError(
+                "cannot insert an item into a CategoricalIndex "
+                "that is not already an existing category"
+            )
+        return code[0]
+
+    def _validate_searchsorted_value(self, value):
+        # searchsorted is very performance sensitive. By converting codes
+        # to same dtype as self.codes, we get much faster performance.
+        if is_scalar(value):
+            codes = self.categories.get_loc(value)
+            codes = self.codes.dtype.type(codes)
+        else:
+            locs = [self.categories.get_loc(x) for x in value]
+            codes = np.array(locs, dtype=self.codes.dtype)
+        return codes
+
     def _validate_fill_value(self, fill_value):
         """
         Convert a user-facing fill_value to a representation to use with our
@@ -1299,15 +1319,8 @@ def memory_usage(self, deep=False):
 
     @doc(_shared_docs["searchsorted"], klass="Categorical")
     def searchsorted(self, value, side="left", sorter=None):
-        # searchsorted is very performance sensitive. By converting codes
-        # to same dtype as self.codes, we get much faster performance.
-        if is_scalar(value):
-            codes = self.categories.get_loc(value)
-            codes = self.codes.dtype.type(codes)
-        else:
-            locs = [self.categories.get_loc(x) for x in value]
-            codes = np.array(locs, dtype=self.codes.dtype)
-        return self.codes.searchsorted(codes, side=side, sorter=sorter)
+        value = self._validate_searchsorted_value(value)
+        return self.codes.searchsorted(value, side=side, sorter=sorter)
 
     def isna(self):
         """
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index cbb30763797d1..d38f77aaceb01 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -20,7 +20,7 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import CategoricalDtype
-from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
+from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna
 
 from pandas.core import accessor
 from pandas.core.algorithms import take_1d
@@ -734,15 +734,10 @@ def insert(self, loc: int, item):
         ValueError if the item is not in the categories
 
         """
-        code = self.categories.get_indexer([item])
-        if (code == -1) and not (is_scalar(item) and isna(item)):
-            raise TypeError(
-                "cannot insert an item into a CategoricalIndex "
-                "that is not already an existing category"
-            )
+        code = self._data._validate_insert_value(item)
 
         codes = self.codes
-        codes = np.concatenate((codes[:loc], code, codes[loc:]))
+        codes = np.concatenate((codes[:loc], [code], codes[loc:]))
         return self._create_from_codes(codes)
 
     def _concat(self, to_concat, name):

From d9de663cae80dbf718b08ab852f3056da7b64559 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 7 Sep 2020 13:46:33 -0700
Subject: [PATCH 53/71] DTA/TDA/PA use self._data instead of self.asi8 for
 self._ndarray (#36171)

---
 pandas/core/arrays/datetimelike.py           | 50 +++++++++++---------
 pandas/core/arrays/datetimes.py              |  4 ++
 pandas/core/arrays/period.py                 |  4 ++
 pandas/core/arrays/timedeltas.py             |  4 ++
 pandas/tests/frame/indexing/test_datetime.py |  4 +-
 5 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index a5b8032974fa4..a218745db0a44 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -27,7 +27,7 @@
 from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError, NullFrequencyError, PerformanceWarning
-from pandas.util._decorators import Appender, Substitution
+from pandas.util._decorators import Appender, Substitution, cache_readonly
 from pandas.util._validators import validate_fillna_kwargs
 
 from pandas.core.dtypes.common import (
@@ -175,6 +175,14 @@ def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
         """
         raise AbstractMethodError(self)
 
+    @classmethod
+    def _rebox_native(cls, value: int) -> Union[int, np.datetime64, np.timedelta64]:
+        """
+        Box an integer unboxed via _unbox_scalar into the native type for
+        the underlying ndarray.
+        """
+        raise AbstractMethodError(cls)
+
     def _unbox_scalar(self, value: DTScalarOrNaT) -> int:
         """
         Unbox the integer value of a scalar `value`.
@@ -458,18 +466,15 @@ class DatetimeLikeArrayMixin(
     # ------------------------------------------------------------------
     # NDArrayBackedExtensionArray compat
 
-    # TODO: make this a cache_readonly; need to get around _index_data
-    #  kludge in libreduction
-    @property
+    @cache_readonly
     def _ndarray(self) -> np.ndarray:
-        # NB: A bunch of Interval tests fail if we use ._data
-        return self.asi8
+        return self._data
 
     def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
         # Note: we do not retain `freq`
-        # error: Too many arguments for "NDArrayBackedExtensionArray"
-        # error: Unexpected keyword argument "dtype" for "NDArrayBackedExtensionArray"
-        return type(self)(arr, dtype=self.dtype)  # type: ignore[call-arg]
+        return type(self)._simple_new(  # type: ignore[attr-defined]
+            arr, dtype=self.dtype
+        )
 
     # ------------------------------------------------------------------
 
@@ -526,7 +531,7 @@ def __array__(self, dtype=None) -> np.ndarray:
         # used for Timedelta/DatetimeArray, overwritten by PeriodArray
         if is_object_dtype(dtype):
             return np.array(list(self), dtype=object)
-        return self._data
+        return self._ndarray
 
     def __getitem__(self, key):
         """
@@ -536,7 +541,7 @@ def __getitem__(self, key):
 
         if lib.is_integer(key):
             # fast-path
-            result = self._data[key]
+            result = self._ndarray[key]
             if self.ndim == 1:
                 return self._box_func(result)
             return self._simple_new(result, dtype=self.dtype)
@@ -557,7 +562,7 @@ def __getitem__(self, key):
             key = check_array_indexer(self, key)
 
         freq = self._get_getitem_freq(key)
-        result = self._data[key]
+        result = self._ndarray[key]
         if lib.is_scalar(result):
             return self._box_func(result)
         return self._simple_new(result, dtype=self.dtype, freq=freq)
@@ -612,7 +617,7 @@ def __setitem__(
 
         value = self._validate_setitem_value(value)
         key = check_array_indexer(self, key)
-        self._data[key] = value
+        self._ndarray[key] = value
         self._maybe_clear_freq()
 
     def _maybe_clear_freq(self):
@@ -663,8 +668,8 @@ def astype(self, dtype, copy=True):
 
     def view(self, dtype=None):
         if dtype is None or dtype is self.dtype:
-            return type(self)(self._data, dtype=self.dtype)
-        return self._data.view(dtype=dtype)
+            return type(self)(self._ndarray, dtype=self.dtype)
+        return self._ndarray.view(dtype=dtype)
 
     # ------------------------------------------------------------------
     # ExtensionArray Interface
@@ -705,7 +710,7 @@ def _from_factorized(cls, values, original):
         return cls(values, dtype=original.dtype)
 
     def _values_for_argsort(self):
-        return self._data
+        return self._ndarray
 
     # ------------------------------------------------------------------
     # Validation Methods
@@ -722,7 +727,7 @@ def _validate_fill_value(self, fill_value):
 
         Returns
         -------
-        fill_value : np.int64
+        fill_value : np.int64, np.datetime64, or np.timedelta64
 
         Raises
         ------
@@ -736,7 +741,8 @@ def _validate_fill_value(self, fill_value):
             fill_value = self._validate_scalar(fill_value, msg)
         except TypeError as err:
             raise ValueError(msg) from err
-        return self._unbox(fill_value)
+        rv = self._unbox(fill_value)
+        return self._rebox_native(rv)
 
     def _validate_shift_value(self, fill_value):
         # TODO(2.0): once this deprecation is enforced, use _validate_fill_value
@@ -951,9 +957,9 @@ def value_counts(self, dropna=False):
         from pandas import Index, Series
 
         if dropna:
-            values = self[~self.isna()]._data
+            values = self[~self.isna()]._ndarray
         else:
-            values = self._data
+            values = self._ndarray
 
         cls = type(self)
 
@@ -1044,9 +1050,9 @@ def fillna(self, value=None, method=None, limit=None):
                 else:
                     func = missing.backfill_1d
 
-                values = self._data
+                values = self._ndarray
                 if not is_period_dtype(self.dtype):
-                    # For PeriodArray self._data is i8, which gets copied
+                    # For PeriodArray self._ndarray is i8, which gets copied
                     #  by `func`.  Otherwise we need to make a copy manually
                     # to avoid modifying `self` in-place.
                     values = values.copy()
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 1bea3a9eb137e..d913e7be9ae5f 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -446,6 +446,10 @@ def _generate_range(
     # -----------------------------------------------------------------
     # DatetimeLike Interface
 
+    @classmethod
+    def _rebox_native(cls, value: int) -> np.datetime64:
+        return np.int64(value).view("M8[ns]")
+
     def _unbox_scalar(self, value):
         if not isinstance(value, self._scalar_type) and value is not NaT:
             raise ValueError("'value' should be a Timestamp.")
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index cc39ffb5d1203..c3a9430736969 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -253,6 +253,10 @@ def _generate_range(cls, start, end, periods, freq, fields):
     # -----------------------------------------------------------------
     # DatetimeLike Interface
 
+    @classmethod
+    def _rebox_native(cls, value: int) -> np.int64:
+        return np.int64(value)
+
     def _unbox_scalar(self, value: Union[Period, NaTType]) -> int:
         if value is NaT:
             return value.value
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 2d694c469b3a9..485ebb49a376d 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -271,6 +271,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
     # ----------------------------------------------------------------
     # DatetimeLike Interface
 
+    @classmethod
+    def _rebox_native(cls, value: int) -> np.timedelta64:
+        return np.int64(value).view("m8[ns]")
+
     def _unbox_scalar(self, value):
         if not isinstance(value, self._scalar_type) and value is not NaT:
             raise ValueError("'value' should be a Timedelta.")
diff --git a/pandas/tests/frame/indexing/test_datetime.py b/pandas/tests/frame/indexing/test_datetime.py
index 1937a4c380dc9..1866ac341def6 100644
--- a/pandas/tests/frame/indexing/test_datetime.py
+++ b/pandas/tests/frame/indexing/test_datetime.py
@@ -23,7 +23,9 @@ def test_setitem(self, timezone_frame):
         b1 = df._mgr.blocks[1]
         b2 = df._mgr.blocks[2]
         tm.assert_extension_array_equal(b1.values, b2.values)
-        assert id(b1.values._data.base) != id(b2.values._data.base)
+        b1base = b1.values._data.base
+        b2base = b2.values._data.base
+        assert b1base is None or (id(b1base) != id(b2base))
 
         # with nan
         df2 = df.copy()

From 7cb14217612c8b253e02a86991835b1af30a43f2 Mon Sep 17 00:00:00 2001
From: Thomas Dickson <td7g11@soton.ac.uk>
Date: Mon, 7 Sep 2020 21:47:39 +0100
Subject: [PATCH 54/71] TST verify groupby doesn't alter unit64s to floats
 #30859 (#36164)

---
 pandas/tests/groupby/test_groupby.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e0196df7ceac0..69397228dd941 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1183,6 +1183,18 @@ def test_groupby_dtype_inference_empty():
     tm.assert_frame_equal(result, expected, by_blocks=True)
 
 
+def test_groupby_unit64_float_conversion():
+    #  GH: 30859 groupby converts unit64 to floats sometimes
+    df = pd.DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]})
+    result = df.groupby(["first", "second"])["value"].max()
+    expected = pd.Series(
+        [16148277970000000000],
+        pd.MultiIndex.from_product([[1], [1]], names=["first", "second"]),
+        name="value",
+    )
+    tm.assert_series_equal(result, expected)
+
+
 def test_groupby_list_infer_array_like(df):
     result = df.groupby(list(df["A"])).mean()
     expected = df.groupby(df["A"]).mean()

From f79614082e03da46e842c42bd817059a2cb99359 Mon Sep 17 00:00:00 2001
From: patrick <61934744+phofl@users.noreply.github.com>
Date: Mon, 7 Sep 2020 23:06:29 +0200
Subject: [PATCH 55/71] Fix compressed multiindex for output of groupby.rolling
 (#36152)

---
 doc/source/whatsnew/v1.1.2.rst      |  1 +
 pandas/core/window/rolling.py       | 10 +++++-----
 pandas/tests/window/test_grouper.py | 21 +++++++++++++++++++++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index e9cba3de56920..28ce49c11b3f0 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -23,6 +23,7 @@ Fixed regressions
 - Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`)
 - Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`)
 - Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`)
+- Fixed regression in :meth:`Series.groupby.rolling` number of levels of :class:`MultiIndex` in input was compressed to one (:issue:`36018`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 4c4ec4d700b7f..235bd5364af02 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -2211,17 +2211,17 @@ def _apply(
         # Compose MultiIndex result from grouping levels then rolling level
         # Aggregate the MultiIndex data as tuples then the level names
         grouped_object_index = self.obj.index
-        grouped_index_name = [grouped_object_index.name]
+        grouped_index_name = [*grouped_object_index.names]
         groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
         result_index_names = groupby_keys + grouped_index_name
 
         result_index_data = []
         for key, values in self._groupby.grouper.indices.items():
             for value in values:
-                if not is_list_like(key):
-                    data = [key, grouped_object_index[value]]
-                else:
-                    data = [*key, grouped_object_index[value]]
+                data = [
+                    *com.maybe_make_list(key),
+                    *com.maybe_make_list(grouped_object_index[value]),
+                ]
                 result_index_data.append(tuple(data))
 
         result_index = MultiIndex.from_tuples(
diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py
index 170bf100b3891..cb85ad7584da7 100644
--- a/pandas/tests/window/test_grouper.py
+++ b/pandas/tests/window/test_grouper.py
@@ -372,3 +372,24 @@ def test_groupby_subset_rolling_subset_with_closed(self):
             name="column1",
         )
         tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("func", ["max", "min"])
+    def test_groupby_rolling_index_changed(self, func):
+        # GH: #36018 nlevels of MultiIndex changed
+        ds = Series(
+            [1, 2, 2],
+            index=pd.MultiIndex.from_tuples(
+                [("a", "x"), ("a", "y"), ("c", "z")], names=["1", "2"]
+            ),
+            name="a",
+        )
+
+        result = getattr(ds.groupby(ds).rolling(2), func)()
+        expected = Series(
+            [np.nan, np.nan, 2.0],
+            index=pd.MultiIndex.from_tuples(
+                [(1, "a", "x"), (2, "a", "y"), (2, "c", "z")], names=["a", "1", "2"]
+            ),
+            name="a",
+        )
+        tm.assert_series_equal(result, expected)

From c962e70175b73c956c5e1fee43b0d40311d08329 Mon Sep 17 00:00:00 2001
From: patrick <61934744+phofl@users.noreply.github.com>
Date: Mon, 7 Sep 2020 23:11:29 +0200
Subject: [PATCH 56/71] TST: DataFrame.replace: TypeError: Cannot compare types
 'ndarray(dtype=int64)' and 'unicode' (#36202)

---
 pandas/tests/frame/methods/test_replace.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index ea2488dfc0877..a77753ed9f9d0 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1599,3 +1599,11 @@ def test_replace_intervals(self):
         result = df.replace({"a": {pd.Interval(0, 1): "x"}})
         expected = pd.DataFrame({"a": ["x", "x"]})
         tm.assert_frame_equal(result, expected)
+
+    def test_replace_unicode(self):
+        # GH: 16784
+        columns_values_map = {"positive": {"正面": 1, "中立": 1, "负面": 0}}
+        df1 = pd.DataFrame({"positive": np.ones(3)})
+        result = df1.replace(columns_values_map)
+        expected = pd.DataFrame({"positive": np.ones(3)})
+        tm.assert_frame_equal(result, expected)

From 22b547b64fc42fb5b41b854f322d1dd42aefeea2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 7 Sep 2020 14:43:12 -0700
Subject: [PATCH 57/71] REF: collect methods by topic (#36173)

---
 pandas/core/arrays/categorical.py | 148 +++++++++++++++++-------------
 pandas/core/indexes/category.py   |  29 +++---
 pandas/core/indexes/datetimes.py  |   3 +
 pandas/core/indexes/interval.py   |  64 +++++++------
 pandas/core/indexes/multi.py      |   2 +
 pandas/core/indexes/numeric.py    |  42 +++++----
 pandas/core/indexes/period.py     |  67 +++++++-------
 pandas/core/indexes/range.py      |   5 +
 pandas/core/indexes/timedeltas.py |   5 +
 9 files changed, 212 insertions(+), 153 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 228e630f95863..58847528d2183 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -393,56 +393,6 @@ def __init__(
         self._dtype = self._dtype.update_dtype(dtype)
         self._codes = coerce_indexer_dtype(codes, dtype.categories)
 
-    @property
-    def categories(self):
-        """
-        The categories of this categorical.
-
-        Setting assigns new values to each category (effectively a rename of
-        each individual category).
-
-        The assigned value has to be a list-like object. All items must be
-        unique and the number of items in the new categories must be the same
-        as the number of items in the old categories.
-
-        Assigning to `categories` is a inplace operation!
-
-        Raises
-        ------
-        ValueError
-            If the new categories do not validate as categories or if the
-            number of new categories is unequal the number of old categories
-
-        See Also
-        --------
-        rename_categories : Rename categories.
-        reorder_categories : Reorder categories.
-        add_categories : Add new categories.
-        remove_categories : Remove the specified categories.
-        remove_unused_categories : Remove categories which are not used.
-        set_categories : Set the categories to the specified ones.
-        """
-        return self.dtype.categories
-
-    @categories.setter
-    def categories(self, categories):
-        new_dtype = CategoricalDtype(categories, ordered=self.ordered)
-        if self.dtype.categories is not None and len(self.dtype.categories) != len(
-            new_dtype.categories
-        ):
-            raise ValueError(
-                "new categories need to have the same number of "
-                "items as the old categories!"
-            )
-        self._dtype = new_dtype
-
-    @property
-    def ordered(self) -> Ordered:
-        """
-        Whether the categories have an ordered relationship.
-        """
-        return self.dtype.ordered
-
     @property
     def dtype(self) -> CategoricalDtype:
         """
@@ -458,10 +408,6 @@ def _constructor(self) -> Type["Categorical"]:
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         return Categorical(scalars, dtype=dtype)
 
-    def _formatter(self, boxed=False):
-        # Defer to CategoricalFormatter's formatter.
-        return None
-
     def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
         """
         Coerce this type to another dtype
@@ -640,6 +586,59 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
 
         return cls(codes, dtype=dtype, fastpath=True)
 
+    # ------------------------------------------------------------------
+    # Categories/Codes/Ordered
+
+    @property
+    def categories(self):
+        """
+        The categories of this categorical.
+
+        Setting assigns new values to each category (effectively a rename of
+        each individual category).
+
+        The assigned value has to be a list-like object. All items must be
+        unique and the number of items in the new categories must be the same
+        as the number of items in the old categories.
+
+        Assigning to `categories` is a inplace operation!
+
+        Raises
+        ------
+        ValueError
+            If the new categories do not validate as categories or if the
+            number of new categories is unequal the number of old categories
+
+        See Also
+        --------
+        rename_categories : Rename categories.
+        reorder_categories : Reorder categories.
+        add_categories : Add new categories.
+        remove_categories : Remove the specified categories.
+        remove_unused_categories : Remove categories which are not used.
+        set_categories : Set the categories to the specified ones.
+        """
+        return self.dtype.categories
+
+    @categories.setter
+    def categories(self, categories):
+        new_dtype = CategoricalDtype(categories, ordered=self.ordered)
+        if self.dtype.categories is not None and len(self.dtype.categories) != len(
+            new_dtype.categories
+        ):
+            raise ValueError(
+                "new categories need to have the same number of "
+                "items as the old categories!"
+            )
+        self._dtype = new_dtype
+
+    @property
+    def ordered(self) -> Ordered:
+        """
+        Whether the categories have an ordered relationship.
+        """
+        return self.dtype.ordered
+
     @property
     def codes(self) -> np.ndarray:
         """
@@ -1104,6 +1103,8 @@ def remove_unused_categories(self, inplace=False):
         if not inplace:
             return cat
 
+    # ------------------------------------------------------------------
+
     def map(self, mapper):
         """
         Map categories using input correspondence (dict, Series, or function).
@@ -1192,6 +1193,9 @@ def map(self, mapper):
     __le__ = _cat_compare_op(operator.le)
     __ge__ = _cat_compare_op(operator.ge)
 
+    # -------------------------------------------------------------
+    # Validators; ideally these can be de-duplicated
+
     def _validate_insert_value(self, value) -> int:
         code = self.categories.get_indexer([value])
         if (code == -1) and not (is_scalar(value) and isna(value)):
@@ -1241,6 +1245,8 @@ def _validate_fill_value(self, fill_value):
             )
         return fill_value
 
+    # -------------------------------------------------------------
+
     def __array__(self, dtype=None) -> np.ndarray:
         """
         The numpy array interface.
@@ -1758,6 +1764,10 @@ def __contains__(self, key) -> bool:
     # ------------------------------------------------------------------
     # Rendering Methods
 
+    def _formatter(self, boxed=False):
+        # Defer to CategoricalFormatter's formatter.
+        return None
+
     def _tidy_repr(self, max_vals=10, footer=True) -> str:
         """
         a short repr displaying only max_vals and an optional (but default
@@ -1987,7 +1997,9 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         result = dict(zip(categories, _result))
         return result
 
-    # reduction ops #
+    # ------------------------------------------------------------------
+    # Reductions
+
     def _reduce(self, name: str, skipna: bool = True, **kwargs):
         func = getattr(self, name, None)
         if func is None:
@@ -2090,6 +2102,9 @@ def mode(self, dropna=True):
         codes = sorted(htable.mode_int64(ensure_int64(codes), dropna))
         return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
 
+    # ------------------------------------------------------------------
+    # ExtensionArray Interface
+
     def unique(self):
         """
         Return the ``Categorical`` which ``categories`` and ``codes`` are
@@ -2179,6 +2194,18 @@ def equals(self, other: object) -> bool:
             return np.array_equal(self._codes, other_codes)
         return False
 
+    @property
+    def _can_hold_na(self):
+        return True
+
+    @classmethod
+    def _concat_same_type(self, to_concat):
+        from pandas.core.dtypes.concat import union_categoricals
+
+        return union_categoricals(to_concat)
+
+    # ------------------------------------------------------------------
+
     def is_dtype_equal(self, other):
         """
         Returns True if categoricals are the same dtype
@@ -2217,17 +2244,6 @@ def describe(self):
 
         return result
 
-    # Implement the ExtensionArray interface
-    @property
-    def _can_hold_na(self):
-        return True
-
-    @classmethod
-    def _concat_same_type(self, to_concat):
-        from pandas.core.dtypes.concat import union_categoricals
-
-        return union_categoricals(to_concat)
-
     def isin(self, values) -> np.ndarray:
         """
         Check whether `values` are contained in Categorical.
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index d38f77aaceb01..7509cb35069e8 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -433,11 +433,6 @@ def _to_safe_for_reshape(self):
         """ convert to object if we are a categorical """
         return self.astype("object")
 
-    def _maybe_cast_indexer(self, key):
-        code = self.categories.get_loc(key)
-        code = self.codes.dtype.type(code)
-        return code
-
     @doc(Index.where)
     def where(self, cond, other=None):
         # TODO: Investigate an alternative implementation with
@@ -537,6 +532,14 @@ def _reindex_non_unique(self, target):
 
         return new_target, indexer, new_indexer
 
+    # --------------------------------------------------------------------
+    # Indexing Methods
+
+    def _maybe_cast_indexer(self, key):
+        code = self.categories.get_loc(key)
+        code = self.codes.dtype.type(code)
+        return code
+
     @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
     def get_indexer(self, target, method=None, limit=None, tolerance=None):
         method = missing.clean_reindex_fill_method(method)
@@ -619,6 +622,15 @@ def _convert_arr_indexer(self, keyarr):
     def _convert_index_indexer(self, keyarr):
         return self._shallow_copy(keyarr)
 
+    @doc(Index._maybe_cast_slice_bound)
+    def _maybe_cast_slice_bound(self, label, side, kind):
+        if kind == "loc":
+            return label
+
+        return super()._maybe_cast_slice_bound(label, side, kind)
+
+    # --------------------------------------------------------------------
+
     def take_nd(self, *args, **kwargs):
         """Alias for `take`"""
         warnings.warn(
@@ -628,13 +640,6 @@ def take_nd(self, *args, **kwargs):
         )
         return self.take(*args, **kwargs)
 
-    @doc(Index._maybe_cast_slice_bound)
-    def _maybe_cast_slice_bound(self, label, side, kind):
-        if kind == "loc":
-            return label
-
-        return super()._maybe_cast_slice_bound(label, side, kind)
-
     def map(self, mapper):
         """
         Map values using input correspondence (a dict, Series, or function).
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 3fd93a8159041..f0b80c2852bd5 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -509,6 +509,9 @@ def snap(self, freq="S"):
         dta = DatetimeArray(snapped, dtype=self.dtype)
         return DatetimeIndex._simple_new(dta, name=self.name)
 
+    # --------------------------------------------------------------------
+    # Indexing Methods
+
     def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
         """
         Calculate datetime bounds for parsed time string and its resolution.
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 419ff81a2a478..3f72577c9420e 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -57,7 +57,7 @@
 from pandas.core.ops import get_op_result_name
 
 if TYPE_CHECKING:
-    from pandas import CategoricalIndex
+    from pandas import CategoricalIndex  # noqa:F401
 
 _VALID_CLOSED = {"left", "right", "both", "neither"}
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
@@ -515,28 +515,6 @@ def is_overlapping(self) -> bool:
         # GH 23309
         return self._engine.is_overlapping
 
-    def _should_fallback_to_positional(self) -> bool:
-        # integer lookups in Series.__getitem__ are unambiguously
-        #  positional in this case
-        return self.dtype.subtype.kind in ["m", "M"]
-
-    def _maybe_cast_slice_bound(self, label, side, kind):
-        return getattr(self, side)._maybe_cast_slice_bound(label, side, kind)
-
-    @Appender(Index._convert_list_indexer.__doc__)
-    def _convert_list_indexer(self, keyarr):
-        """
-        we are passed a list-like indexer. Return the
-        indexer for matching intervals.
-        """
-        locs = self.get_indexer_for(keyarr)
-
-        # we have missing values
-        if (locs == -1).any():
-            raise KeyError
-
-        return locs
-
     def _can_reindex(self, indexer: np.ndarray) -> None:
         """
         Check if we are allowing reindexing with this particular indexer.
@@ -668,6 +646,9 @@ def _searchsorted_monotonic(self, label, side, exclude_label=False):
 
         return sub_idx._searchsorted_monotonic(label, side)
 
+    # --------------------------------------------------------------------
+    # Indexing Methods
+
     def get_loc(
         self, key, method: Optional[str] = None, tolerance=None
     ) -> Union[int, slice, np.ndarray]:
@@ -885,6 +866,30 @@ def _convert_slice_indexer(self, key: slice, kind: str):
 
         return super()._convert_slice_indexer(key, kind)
 
+    def _should_fallback_to_positional(self) -> bool:
+        # integer lookups in Series.__getitem__ are unambiguously
+        #  positional in this case
+        return self.dtype.subtype.kind in ["m", "M"]
+
+    def _maybe_cast_slice_bound(self, label, side, kind):
+        return getattr(self, side)._maybe_cast_slice_bound(label, side, kind)
+
+    @Appender(Index._convert_list_indexer.__doc__)
+    def _convert_list_indexer(self, keyarr):
+        """
+        we are passed a list-like indexer. Return the
+        indexer for matching intervals.
+        """
+        locs = self.get_indexer_for(keyarr)
+
+        # we have missing values
+        if (locs == -1).any():
+            raise KeyError
+
+        return locs
+
+    # --------------------------------------------------------------------
+
     @Appender(Index.where.__doc__)
     def where(self, cond, other=None):
         if other is None:
@@ -1030,6 +1035,9 @@ def equals(self, other: object) -> bool:
             and self.closed == other.closed
         )
 
+    # --------------------------------------------------------------------
+    # Set Operations
+
     @Appender(Index.intersection.__doc__)
     @SetopCheck(op_name="intersection")
     def intersection(
@@ -1115,6 +1123,12 @@ def func(self, other, sort=sort):
 
         return func
 
+    union = _setop("union")
+    difference = _setop("difference")
+    symmetric_difference = _setop("symmetric_difference")
+
+    # --------------------------------------------------------------------
+
     @property
     def is_all_dates(self) -> bool:
         """
@@ -1123,10 +1137,6 @@ def is_all_dates(self) -> bool:
         """
         return False
 
-    union = _setop("union")
-    difference = _setop("difference")
-    symmetric_difference = _setop("symmetric_difference")
-
     # TODO: arithmetic operations
 
     # GH#30817 until IntervalArray implements inequalities, get them from Index
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index e49a23935efbd..9630e154ccd17 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -3154,6 +3154,8 @@ def _update_indexer(idxr, indexer=indexer):
 
         return indexer._values
 
+    # --------------------------------------------------------------------
+
     def _reorder_indexer(
         self,
         seq: Tuple[Union[Scalar, Iterable, AnyArrayLike], ...],
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index cd3f1f51a86d2..079f43cb2c66b 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -97,6 +97,9 @@ def _validate_dtype(cls, dtype: Dtype) -> None:
                 f"Incorrect `dtype` passed: expected {expected}, received {dtype}"
             )
 
+    # ----------------------------------------------------------------
+    # Indexing Methods
+
     @doc(Index._maybe_cast_slice_bound)
     def _maybe_cast_slice_bound(self, label, side, kind):
         assert kind in ["loc", "getitem", None]
@@ -104,6 +107,8 @@ def _maybe_cast_slice_bound(self, label, side, kind):
         # we will try to coerce to integers
         return self._maybe_cast_indexer(label)
 
+    # ----------------------------------------------------------------
+
     @doc(Index._shallow_copy)
     def _shallow_copy(self, values=None, name: Label = lib.no_default):
         if values is not None and not self._can_hold_na and values.dtype.kind == "f":
@@ -293,6 +298,9 @@ class UInt64Index(IntegerIndex):
     _engine_type = libindex.UInt64Engine
     _default_dtype = np.dtype(np.uint64)
 
+    # ----------------------------------------------------------------
+    # Indexing Methods
+
     @doc(Index._convert_arr_indexer)
     def _convert_arr_indexer(self, keyarr):
         # Cast the indexer to uint64 if possible so that the values returned
@@ -314,6 +322,8 @@ def _convert_index_indexer(self, keyarr):
             return keyarr.astype(np.uint64)
         return keyarr
 
+    # ----------------------------------------------------------------
+
     def _wrap_joined_index(self, joined, other):
         name = get_op_result_name(self, other)
         return UInt64Index(joined, name=name)
@@ -385,6 +395,22 @@ def _convert_slice_indexer(self, key: slice, kind: str):
         # translate to locations
         return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
 
+    @doc(Index.get_loc)
+    def get_loc(self, key, method=None, tolerance=None):
+        if is_bool(key):
+            # Catch this to avoid accidentally casting to 1.0
+            raise KeyError(key)
+
+        if is_float(key) and np.isnan(key):
+            nan_idxs = self._nan_idxs
+            if not len(nan_idxs):
+                raise KeyError(key)
+            elif len(nan_idxs) == 1:
+                return nan_idxs[0]
+            return nan_idxs
+
+        return super().get_loc(key, method=method, tolerance=tolerance)
+
     # ----------------------------------------------------------------
 
     def _format_native_types(
@@ -409,22 +435,6 @@ def __contains__(self, other: Any) -> bool:
 
         return is_float(other) and np.isnan(other) and self.hasnans
 
-    @doc(Index.get_loc)
-    def get_loc(self, key, method=None, tolerance=None):
-        if is_bool(key):
-            # Catch this to avoid accidentally casting to 1.0
-            raise KeyError(key)
-
-        if is_float(key) and np.isnan(key):
-            nan_idxs = self._nan_idxs
-            if not len(nan_idxs):
-                raise KeyError(key)
-            elif len(nan_idxs) == 1:
-                return nan_idxs[0]
-            return nan_idxs
-
-        return super().get_loc(key, method=method, tolerance=tolerance)
-
     @cache_readonly
     def is_unique(self) -> bool:
         return super().is_unique and self._nan_idxs.size < 2
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index cdb502199c6f1..5282b6f0154b4 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -433,6 +433,41 @@ def inferred_type(self) -> str:
         # indexing
         return "period"
 
+    def insert(self, loc, item):
+        if not isinstance(item, Period) or self.freq != item.freq:
+            return self.astype(object).insert(loc, item)
+
+        i8result = np.concatenate(
+            (self[:loc].asi8, np.array([item.ordinal]), self[loc:].asi8)
+        )
+        arr = type(self._data)._simple_new(i8result, dtype=self.dtype)
+        return type(self)._simple_new(arr, name=self.name)
+
+    def join(self, other, how="left", level=None, return_indexers=False, sort=False):
+        """
+        See Index.join
+        """
+        self._assert_can_do_setop(other)
+
+        if not isinstance(other, PeriodIndex):
+            return self.astype(object).join(
+                other, how=how, level=level, return_indexers=return_indexers, sort=sort
+            )
+
+        # _assert_can_do_setop ensures we have matching dtype
+        result = Int64Index.join(
+            self,
+            other,
+            how=how,
+            level=level,
+            return_indexers=return_indexers,
+            sort=sort,
+        )
+        return result
+
+    # ------------------------------------------------------------------------
+    # Indexing Methods
+
     @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
     def get_indexer(self, target, method=None, limit=None, tolerance=None):
         target = ensure_index(target)
@@ -607,38 +642,6 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True
         except KeyError as err:
             raise KeyError(key) from err
 
-    def insert(self, loc, item):
-        if not isinstance(item, Period) or self.freq != item.freq:
-            return self.astype(object).insert(loc, item)
-
-        i8result = np.concatenate(
-            (self[:loc].asi8, np.array([item.ordinal]), self[loc:].asi8)
-        )
-        arr = type(self._data)._simple_new(i8result, dtype=self.dtype)
-        return type(self)._simple_new(arr, name=self.name)
-
-    def join(self, other, how="left", level=None, return_indexers=False, sort=False):
-        """
-        See Index.join
-        """
-        self._assert_can_do_setop(other)
-
-        if not isinstance(other, PeriodIndex):
-            return self.astype(object).join(
-                other, how=how, level=level, return_indexers=return_indexers, sort=sort
-            )
-
-        # _assert_can_do_setop ensures we have matching dtype
-        result = Int64Index.join(
-            self,
-            other,
-            how=how,
-            level=level,
-            return_indexers=return_indexers,
-            sort=sort,
-        )
-        return result
-
     # ------------------------------------------------------------------------
     # Set Operation Methods
 
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index f1457a9aac62b..684691501de5c 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -338,6 +338,9 @@ def __contains__(self, key: Any) -> bool:
             return False
         return key in self._range
 
+    # --------------------------------------------------------------------
+    # Indexing Methods
+
     @doc(Int64Index.get_loc)
     def get_loc(self, key, method=None, tolerance=None):
         if method is None and tolerance is None:
@@ -379,6 +382,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
             locs[valid] = len(self) - 1 - locs[valid]
         return ensure_platform_int(locs)
 
+    # --------------------------------------------------------------------
+
     def tolist(self):
         return list(self._range)
 
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 85c8396dfd1fe..df08fda78823d 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -202,6 +202,9 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
         """
         return is_timedelta64_dtype(dtype)
 
+    # -------------------------------------------------------------------
+    # Indexing Methods
+
     def get_loc(self, key, method=None, tolerance=None):
         """
         Get integer location for requested label
@@ -248,6 +251,8 @@ def _maybe_cast_slice_bound(self, label, side: str, kind):
 
         return label
 
+    # -------------------------------------------------------------------
+
     def is_type_compatible(self, typ) -> bool:
         return typ == self.inferred_type or typ == "timedelta"
 

From a1559865dcdf2ec1cfbe1418544288558032d026 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 7 Sep 2020 15:41:00 -0700
Subject: [PATCH 58/71] REF: implement Categorical._validate_setitem_value
 (#36180)

---
 pandas/core/arrays/categorical.py  | 35 +++++++++++++++---------------
 pandas/core/arrays/datetimelike.py | 16 +++++++++-----
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 58847528d2183..b732db4c66003 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -9,7 +9,7 @@
 
 from pandas._config import get_option
 
-from pandas._libs import NaT, algos as libalgos, hashtable as htable
+from pandas._libs import NaT, algos as libalgos, hashtable as htable, lib
 from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import cache_readonly, deprecate_kwarg, doc
@@ -1868,14 +1868,6 @@ def __repr__(self) -> str:
 
     # ------------------------------------------------------------------
 
-    def _maybe_coerce_indexer(self, indexer):
-        """
-        return an indexer coerced to the codes dtype
-        """
-        if isinstance(indexer, np.ndarray) and indexer.dtype.kind == "i":
-            indexer = indexer.astype(self._codes.dtype)
-        return indexer
-
     def __getitem__(self, key):
         """
         Return an item.
@@ -1905,6 +1897,11 @@ def __setitem__(self, key, value):
             If (one or more) Value is not in categories or if a assigned
             `Categorical` does not have the same categories
         """
+        key = self._validate_setitem_key(key)
+        value = self._validate_setitem_value(value)
+        self._ndarray[key] = value
+
+    def _validate_setitem_value(self, value):
         value = extract_array(value, extract_numpy=True)
 
         # require identical categories set
@@ -1934,12 +1931,19 @@ def __setitem__(self, key, value):
                 "category, set the categories first"
             )
 
-        # set by position
-        if isinstance(key, (int, np.integer)):
+        lindexer = self.categories.get_indexer(rvalue)
+        if isinstance(lindexer, np.ndarray) and lindexer.dtype.kind == "i":
+            lindexer = lindexer.astype(self._ndarray.dtype)
+
+        return lindexer
+
+    def _validate_setitem_key(self, key):
+        if lib.is_integer(key):
+            # set by position
             pass
 
-        # tuple of indexers (dataframe)
         elif isinstance(key, tuple):
+            # tuple of indexers (dataframe)
             # only allow 1 dimensional slicing, but can
             # in a 2-d case be passed (slice(None),....)
             if len(key) == 2:
@@ -1951,17 +1955,14 @@ def __setitem__(self, key, value):
             else:
                 raise AssertionError("invalid slicing for a 1-ndim categorical")
 
-        # slicing in Series or Categorical
         elif isinstance(key, slice):
+            # slicing in Series or Categorical
             pass
 
         # else: array of True/False in Series or Categorical
 
-        lindexer = self.categories.get_indexer(rvalue)
-        lindexer = self._maybe_coerce_indexer(lindexer)
-
         key = check_array_indexer(self, key)
-        self._codes[key] = lindexer
+        return key
 
     def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         """
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index a218745db0a44..2626890c2dbe5 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -546,6 +546,15 @@ def __getitem__(self, key):
                 return self._box_func(result)
             return self._simple_new(result, dtype=self.dtype)
 
+        key = self._validate_getitem_key(key)
+        result = self._ndarray[key]
+        if lib.is_scalar(result):
+            return self._box_func(result)
+
+        freq = self._get_getitem_freq(key)
+        return self._simple_new(result, dtype=self.dtype, freq=freq)
+
+    def _validate_getitem_key(self, key):
         if com.is_bool_indexer(key):
             # first convert to boolean, because check_array_indexer doesn't
             # allow object dtype
@@ -560,12 +569,7 @@ def __getitem__(self, key):
             pass
         else:
             key = check_array_indexer(self, key)
-
-        freq = self._get_getitem_freq(key)
-        result = self._ndarray[key]
-        if lib.is_scalar(result):
-            return self._box_func(result)
-        return self._simple_new(result, dtype=self.dtype, freq=freq)
+        return key
 
     def _get_getitem_freq(self, key):
         """

From 5aa96ddc103eeb8fd8ce57e82a709f5ea766f674 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 7 Sep 2020 17:15:51 -0700
Subject: [PATCH 59/71] COMPAT: match numpy behavior for searchsorted on
 dt64/td64 (#36176)

---
 doc/source/whatsnew/v1.2.0.rst           |  1 +
 pandas/core/arrays/datetimelike.py       |  7 +++----
 pandas/tests/arrays/test_datetimelike.py | 11 ++++++++---
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index ccaae9f996425..2afa1f1a6199e 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -228,6 +228,7 @@ Datetimelike
 - Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`)
 - Bug in :meth:`DatetimeIndex.get_slice_bound` where ``datetime.date`` objects were not accepted or naive :class:`Timestamp` with a tz-aware :class:`DatetimeIndex` (:issue:`35690`)
 - Bug in :meth:`DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`)
+- Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64`` or ``timedelta64`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`)
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 2626890c2dbe5..6477b94a823ce 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -862,7 +862,8 @@ def _validate_searchsorted_value(self, value):
             # TODO: cast_str?  we accept it for scalar
             value = self._validate_listlike(value, "searchsorted")
 
-        return self._unbox(value)
+        rv = self._unbox(value)
+        return self._rebox_native(rv)
 
     def _validate_setitem_value(self, value):
         msg = (
@@ -941,9 +942,7 @@ def searchsorted(self, value, side="left", sorter=None):
             Array of insertion points with the same shape as `value`.
         """
         value = self._validate_searchsorted_value(value)
-
-        # TODO: Use datetime64 semantics for sorting, xref GH#29844
-        return self.asi8.searchsorted(value, side=side, sorter=sorter)
+        return self._data.searchsorted(value, side=side, sorter=sorter)
 
     def value_counts(self, dropna=False):
         """
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index b1ab700427c28..292557fc04258 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -241,10 +241,15 @@ def test_searchsorted(self):
         expected = np.array([2, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
 
-        # Following numpy convention, NaT goes at the beginning
-        #  (unlike NaN which goes at the end)
+        # GH#29884 match numpy convention on whether NaT goes
+        #  at the end or the beginning
         result = arr.searchsorted(pd.NaT)
-        assert result == 0
+        if _np_version_under1p18 or self.array_cls is PeriodArray:
+            # Following numpy convention, NaT goes at the beginning
+            #  (unlike NaN which goes at the end)
+            assert result == 0
+        else:
+            assert result == 10
 
     def test_getitem_2d(self, arr1d):
         # 2d slicing on a 1D array

From 81c5802e6eb6cf4b441f5c386bd39996c5503425 Mon Sep 17 00:00:00 2001
From: Nidhi Zare <zarenidhi5@gmail.com>
Date: Tue, 8 Sep 2020 06:00:19 +0530
Subject: [PATCH 60/71] pandas docs json_normalize example (#36194)

Co-authored-by: Nidhi Zare <nidhizare2685@gmail.com>
---
 pandas/io/json/_normalize.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
index 44765dbe74b46..2e1fc57e88ed1 100644
--- a/pandas/io/json/_normalize.py
+++ b/pandas/io/json/_normalize.py
@@ -176,7 +176,7 @@ def _json_normalize(
     ...          'fitness': {'height': 130, 'weight': 60}},
     ...         {'id': 2, 'name': 'Faye Raker',
     ...          'fitness': {'height': 130, 'weight': 60}}]
-    >>> json_normalize(data, max_level=0)
+    >>> pandas.json_normalize(data, max_level=0)
                 fitness                 id        name
     0   {'height': 130, 'weight': 60}  1.0   Cole Volk
     1   {'height': 130, 'weight': 60}  NaN    Mose Reg
@@ -191,7 +191,7 @@ def _json_normalize(
     ...          'fitness': {'height': 130, 'weight': 60}},
     ...         {'id': 2, 'name': 'Faye Raker',
     ...          'fitness': {'height': 130, 'weight': 60}}]
-    >>> json_normalize(data, max_level=1)
+    >>> pandas.json_normalize(data, max_level=1)
       fitness.height  fitness.weight   id    name
     0   130              60          1.0    Cole Volk
     1   130              60          NaN    Mose Reg
@@ -208,7 +208,7 @@ def _json_normalize(
     ...          'info': {'governor': 'John Kasich'},
     ...          'counties': [{'name': 'Summit', 'population': 1234},
     ...                       {'name': 'Cuyahoga', 'population': 1337}]}]
-    >>> result = json_normalize(data, 'counties', ['state', 'shortname',
+    >>> result = pandas.json_normalize(data, 'counties', ['state', 'shortname',
     ...                                            ['info', 'governor']])
     >>> result
              name  population    state shortname info.governor
@@ -219,7 +219,7 @@ def _json_normalize(
     4    Cuyahoga        1337   Ohio       OH    John Kasich
 
     >>> data = {'A': [1, 2]}
-    >>> json_normalize(data, 'A', record_prefix='Prefix.')
+    >>> pandas.json_normalize(data, 'A', record_prefix='Prefix.')
         Prefix.0
     0          1
     1          2

From a56c6af86c6ddf46f872cfabc79904ece3543441 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Tue, 8 Sep 2020 02:51:20 -0700
Subject: [PATCH 61/71] BUG: GroupbyRolling with an empty frame (#36208)

Co-authored-by: Matt Roeschke <mroeschke@housecanary.com>
---
 doc/source/whatsnew/v1.1.2.rst      |  2 +-
 pandas/core/window/rolling.py       | 10 ++++++----
 pandas/tests/window/test_grouper.py | 12 ++++++++++++
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 28ce49c11b3f0..f13d38d1f8f76 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -24,7 +24,7 @@ Fixed regressions
 - Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`)
 - Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`)
 - Fixed regression in :meth:`Series.groupby.rolling` number of levels of :class:`MultiIndex` in input was compressed to one (:issue:`36018`)
--
+- Fixed regression in :class:`DataFrameGroupBy` on an empty :class:`DataFrame` (:issue:`36197`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 235bd5364af02..9466ada3f4578 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -2240,10 +2240,12 @@ def _create_blocks(self, obj: FrameOrSeriesUnion):
         """
         # Ensure the object we're rolling over is monotonically sorted relative
         # to the groups
-        groupby_order = np.concatenate(
-            list(self._groupby.grouper.indices.values())
-        ).astype(np.int64)
-        obj = obj.take(groupby_order)
+        # GH 36197
+        if not obj.empty:
+            groupby_order = np.concatenate(
+                list(self._groupby.grouper.indices.values())
+            ).astype(np.int64)
+            obj = obj.take(groupby_order)
         return super()._create_blocks(obj)
 
     def _get_cython_func_type(self, func: str) -> Callable:
diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py
index cb85ad7584da7..786cf68d28871 100644
--- a/pandas/tests/window/test_grouper.py
+++ b/pandas/tests/window/test_grouper.py
@@ -393,3 +393,15 @@ def test_groupby_rolling_index_changed(self, func):
             name="a",
         )
         tm.assert_series_equal(result, expected)
+
+    def test_groupby_rolling_empty_frame(self):
+        # GH 36197
+        expected = pd.DataFrame({"s1": []})
+        result = expected.groupby("s1").rolling(window=1).sum()
+        expected.index = pd.MultiIndex.from_tuples([], names=["s1", None])
+        tm.assert_frame_equal(result, expected)
+
+        expected = pd.DataFrame({"s1": [], "s2": []})
+        result = expected.groupby(["s1", "s2"]).rolling(window=1).sum()
+        expected.index = pd.MultiIndex.from_tuples([], names=["s1", "s2", None])
+        tm.assert_frame_equal(result, expected)

From 4a0152e731c3c34d2f5cde80952d1bee497df80f Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 8 Sep 2020 11:22:20 +0100
Subject: [PATCH 62/71] DOC: doc fix (#36205)

---
 doc/source/whatsnew/v1.1.2.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index f13d38d1f8f76..0e4a88f3ee56b 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -51,7 +51,7 @@ Bug fixes
 Other
 ~~~~~
 - :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize` (:issue:`35667`)
-- :meth:`DataFrame.plot` and meth:`Series.plot` raise ``UserWarning`` about usage of FixedFormatter and FixedLocator (:issue:`35684` and :issue:`35945`)
+- :meth:`DataFrame.plot` and :meth:`Series.plot` raise ``UserWarning`` about usage of ``FixedFormatter`` and ``FixedLocator`` (:issue:`35684` and :issue:`35945`)
 
 .. ---------------------------------------------------------------------------
 

From 3aed293f71416a1543c962b3a72c6a31d5c36006 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 8 Sep 2020 12:50:13 +0100
Subject: [PATCH 63/71] DOC: release date for 1.1.2 (#36182)

---
 doc/source/whatsnew/v1.1.2.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 0e4a88f3ee56b..a214ad9762733 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -1,7 +1,7 @@
 .. _whatsnew_112:
 
-What's new in 1.1.2 (??)
-------------------------
+What's new in 1.1.2 (September 8, 2020)
+---------------------------------------
 
 These are the changes in pandas 1.1.2. See :ref:`release` for a full changelog
 including other versions of pandas.

From 4c9add82fbe587599892c22b6718d467a1d4de9a Mon Sep 17 00:00:00 2001
From: Yanxian Lin <yanxian@umail.ucsb.edu>
Date: Tue, 8 Sep 2020 06:01:43 -0700
Subject: [PATCH 64/71] Fixed pandas.json_normalize doctests errors` (#36207)

---
 pandas/io/json/_normalize.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
index 2e1fc57e88ed1..3ed0b5851b395 100644
--- a/pandas/io/json/_normalize.py
+++ b/pandas/io/json/_normalize.py
@@ -163,11 +163,11 @@ def _json_normalize(
     >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
     ...         {'name': {'given': 'Mose', 'family': 'Regner'}},
     ...         {'id': 2, 'name': 'Faye Raker'}]
-    >>> pandas.json_normalize(data)
-        id        name name.family name.first name.given name.last
-    0  1.0         NaN         NaN     Coleen        NaN      Volk
-    1  NaN         NaN      Regner        NaN       Mose       NaN
-    2  2.0  Faye Raker         NaN        NaN        NaN       NaN
+    >>> pd.json_normalize(data)
+        id name.first name.last name.given name.family        name
+    0  1.0     Coleen      Volk        NaN         NaN         NaN
+    1  NaN        NaN       NaN       Mose      Regner         NaN
+    2  2.0        NaN       NaN        NaN         NaN  Faye Raker
 
     >>> data = [{'id': 1,
     ...          'name': "Cole Volk",
@@ -176,11 +176,11 @@ def _json_normalize(
     ...          'fitness': {'height': 130, 'weight': 60}},
     ...         {'id': 2, 'name': 'Faye Raker',
     ...          'fitness': {'height': 130, 'weight': 60}}]
-    >>> pandas.json_normalize(data, max_level=0)
-                fitness                 id        name
-    0   {'height': 130, 'weight': 60}  1.0   Cole Volk
-    1   {'height': 130, 'weight': 60}  NaN    Mose Reg
-    2   {'height': 130, 'weight': 60}  2.0  Faye Raker
+    >>> pd.json_normalize(data, max_level=0)
+        id        name                        fitness
+    0  1.0   Cole Volk  {'height': 130, 'weight': 60}
+    1  NaN    Mose Reg  {'height': 130, 'weight': 60}
+    2  2.0  Faye Raker  {'height': 130, 'weight': 60}
 
     Normalizes nested data up to level 1.
 
@@ -191,11 +191,11 @@ def _json_normalize(
     ...          'fitness': {'height': 130, 'weight': 60}},
     ...         {'id': 2, 'name': 'Faye Raker',
     ...          'fitness': {'height': 130, 'weight': 60}}]
-    >>> pandas.json_normalize(data, max_level=1)
-      fitness.height  fitness.weight   id    name
-    0   130              60          1.0    Cole Volk
-    1   130              60          NaN    Mose Reg
-    2   130              60          2.0    Faye Raker
+    >>> pd.json_normalize(data, max_level=1)
+        id        name  fitness.height  fitness.weight
+    0  1.0   Cole Volk             130              60
+    1  NaN    Mose Reg             130              60
+    2  2.0  Faye Raker             130              60
 
     >>> data = [{'state': 'Florida',
     ...          'shortname': 'FL',
@@ -208,7 +208,7 @@ def _json_normalize(
     ...          'info': {'governor': 'John Kasich'},
     ...          'counties': [{'name': 'Summit', 'population': 1234},
     ...                       {'name': 'Cuyahoga', 'population': 1337}]}]
-    >>> result = pandas.json_normalize(data, 'counties', ['state', 'shortname',
+    >>> result = pd.json_normalize(data, 'counties', ['state', 'shortname',
     ...                                            ['info', 'governor']])
     >>> result
              name  population    state shortname info.governor
@@ -219,7 +219,7 @@ def _json_normalize(
     4    Cuyahoga        1337   Ohio       OH    John Kasich
 
     >>> data = {'A': [1, 2]}
-    >>> pandas.json_normalize(data, 'A', record_prefix='Prefix.')
+    >>> pd.json_normalize(data, 'A', record_prefix='Prefix.')
         Prefix.0
     0          1
     1          2

From 11643bc9072d25c49e57339959415771fc7b78fa Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 8 Sep 2020 09:45:08 -0400
Subject: [PATCH 65/71] BUG: copying series into empty dataframe does not
 preserve dataframe index name (#36141)

---
 doc/source/whatsnew/v1.1.2.rst        |  1 +
 pandas/core/frame.py                  |  8 +++++---
 pandas/tests/indexing/test_partial.py | 12 ++++++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index a214ad9762733..c6a08f4fb852a 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -43,6 +43,7 @@ Bug fixes
 - Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`)
 - Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`)
 - Bug in :meth:`import_optional_dependency` returning incorrect package names in cases where package name is different from import name (:issue:`35948`)
+- Bug when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`31368`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e1a889bf79d95..59cf4c0e2f81d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3206,9 +3206,11 @@ def _ensure_valid_index(self, value):
                     "and a value that cannot be converted to a Series"
                 ) from err
 
-            self._mgr = self._mgr.reindex_axis(
-                value.index.copy(), axis=1, fill_value=np.nan
-            )
+            # GH31368 preserve name of index
+            index_copy = value.index.copy()
+            index_copy.name = self.index.name
+
+            self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan)
 
     def _box_col_values(self, values, loc: int) -> Series:
         """
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 350f86b4e9fd0..7afbbc2b9ab2b 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -660,3 +660,15 @@ def test_indexing_timeseries_regression(self):
         expected = Series(rng, index=rng)
 
         tm.assert_series_equal(result, expected)
+
+    def test_index_name_empty(self):
+        # GH 31368
+        df = pd.DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
+        series = pd.Series(1.23, index=pd.RangeIndex(4, name="series_index"))
+
+        df["series"] = series
+        expected = pd.DataFrame(
+            {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index")
+        )
+
+        tm.assert_frame_equal(df, expected)

From edd802f99cbbe9840e7b70d556dd28b2331c326c Mon Sep 17 00:00:00 2001
From: tiagohonorato <61059243+tiagohonorato@users.noreply.github.com>
Date: Tue, 8 Sep 2020 12:28:21 -0300
Subject: [PATCH 66/71] CLN remove trailing commas (#36222)

---
 pandas/tests/io/pytables/test_timezones.py | 4 ++--
 pandas/tests/io/test_feather.py            | 2 +-
 pandas/tests/io/test_s3.py                 | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py
index 38d32b0bdc8a3..1c29928991cde 100644
--- a/pandas/tests/io/pytables/test_timezones.py
+++ b/pandas/tests/io/pytables/test_timezones.py
@@ -110,7 +110,7 @@ def test_append_with_timezones_dateutil(setup_path):
         dti = dti._with_freq(None)  # freq doesnt round-trip
 
         # GH 4098 example
-        df = DataFrame(dict(A=Series(range(3), index=dti,)))
+        df = DataFrame(dict(A=Series(range(3), index=dti)))
 
         _maybe_remove(store, "df")
         store.put("df", df)
@@ -197,7 +197,7 @@ def test_append_with_timezones_pytz(setup_path):
         dti = dti._with_freq(None)  # freq doesnt round-trip
 
         # GH 4098 example
-        df = DataFrame(dict(A=Series(range(3), index=dti,)))
+        df = DataFrame(dict(A=Series(range(3), index=dti)))
 
         _maybe_remove(store, "df")
         store.put("df", df)
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index a8a5c8f00e6bf..c1e63f512b53e 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -76,7 +76,7 @@ def test_basic(self):
                     pd.Timestamp("20130103"),
                 ],
                 "dtns": pd.DatetimeIndex(
-                    list(pd.date_range("20130101", periods=3, freq="ns")), freq=None,
+                    list(pd.date_range("20130101", periods=3, freq="ns")), freq=None
                 ),
             }
         )
diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py
index a137e76b1696b..0ee6cb0796644 100644
--- a/pandas/tests/io/test_s3.py
+++ b/pandas/tests/io/test_s3.py
@@ -43,6 +43,6 @@ def test_read_with_creds_from_pub_bucket():
         os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
         os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
         df = read_csv(
-            "s3://gdelt-open-data/events/1981.csv", nrows=5, sep="\t", header=None,
+            "s3://gdelt-open-data/events/1981.csv", nrows=5, sep="\t", header=None
         )
         assert len(df) == 5

From 9339b8059b0777706df253a2001922e7903bcc95 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 8 Sep 2020 08:29:03 -0700
Subject: [PATCH 67/71] CLN: remove unused return value in _create_blocks
 (#36196)

---
 pandas/core/window/rolling.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 9466ada3f4578..5a7482076903c 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -234,7 +234,7 @@ def _validate_get_window_bounds_signature(window: BaseIndexer) -> None:
                 f"get_window_bounds"
             )
 
-    def _create_blocks(self, obj: FrameOrSeriesUnion):
+    def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries:
         """
         Split data into blocks & return conformed data.
         """
@@ -242,9 +242,8 @@ def _create_blocks(self, obj: FrameOrSeriesUnion):
         if self.on is not None and not isinstance(self.on, Index):
             if obj.ndim == 2:
                 obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
-        blocks = obj._to_dict_of_blocks(copy=False).values()
 
-        return blocks, obj
+        return obj
 
     def _gotitem(self, key, ndim, subset=None):
         """
@@ -333,7 +332,7 @@ def __repr__(self) -> str:
 
     def __iter__(self):
         window = self._get_window(win_type=None)
-        _, obj = self._create_blocks(self._selected_obj)
+        obj = self._create_data(self._selected_obj)
         index = self._get_window_indexer(window=window)
 
         start, end = index.get_window_bounds(
@@ -469,7 +468,7 @@ def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series":
         """
         Series version of _apply_blockwise
         """
-        _, obj = self._create_blocks(self._selected_obj)
+        obj = self._create_data(self._selected_obj)
 
         try:
             values = self._prep_values(obj.values)
@@ -489,7 +488,7 @@ def _apply_blockwise(
         if self._selected_obj.ndim == 1:
             return self._apply_series(homogeneous_func)
 
-        _, obj = self._create_blocks(self._selected_obj)
+        obj = self._create_data(self._selected_obj)
         mgr = obj._mgr
 
         def hfunc(bvalues: ArrayLike) -> ArrayLike:
@@ -1268,7 +1267,7 @@ def count(self):
         # implementations shouldn't end up here
         assert not isinstance(self.window, BaseIndexer)
 
-        _, obj = self._create_blocks(self._selected_obj)
+        obj = self._create_data(self._selected_obj)
 
         def hfunc(values: np.ndarray) -> np.ndarray:
             result = notna(values)
@@ -2234,7 +2233,7 @@ def _apply(
     def _constructor(self):
         return Rolling
 
-    def _create_blocks(self, obj: FrameOrSeriesUnion):
+    def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries:
         """
         Split data into blocks & return conformed data.
         """
@@ -2246,7 +2245,7 @@ def _create_blocks(self, obj: FrameOrSeriesUnion):
                 list(self._groupby.grouper.indices.values())
             ).astype(np.int64)
             obj = obj.take(groupby_order)
-        return super()._create_blocks(obj)
+        return super()._create_data(obj)
 
     def _get_cython_func_type(self, func: str) -> Callable:
         """

From 070481c3650af926849eb2a01fecd6db20899a5d Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 8 Sep 2020 11:30:36 -0400
Subject: [PATCH 68/71] Make to_numeric default to correct precision (#36149)

---
 doc/source/whatsnew/v1.2.0.rst        |  2 +-
 pandas/_libs/src/parse_helper.h       |  4 +-
 pandas/tests/tools/test_to_numeric.py | 58 +++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 2afa1f1a6199e..2aac2596c18cb 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -245,7 +245,7 @@ Timezones
 
 Numeric
 ^^^^^^^
--
+- Bug in :func:`to_numeric` where float precision was incorrect (:issue:`31364`)
 -
 
 Conversion
diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h
index 2ada0a4bd173d..d161c4e29fe15 100644
--- a/pandas/_libs/src/parse_helper.h
+++ b/pandas/_libs/src/parse_helper.h
@@ -18,7 +18,9 @@ int to_double(char *item, double *p_value, char sci, char decimal,
     char *p_end = NULL;
     int error = 0;
 
-    *p_value = xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int);
+    /* Switch to precise xstrtod GH 31364 */
+    *p_value = precise_xstrtod(item, &p_end, decimal, sci, '\0', 1,
+                               &error, maybe_int);
 
     return (error == 0) && (!*p_end);
 }
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 263887a8ea36e..450076f2824ad 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -649,3 +649,61 @@ def test_failure_to_convert_uint64_string_to_NaN():
     ser = Series([32, 64, np.nan])
     result = to_numeric(pd.Series(["32", "64", "uint64"]), errors="coerce")
     tm.assert_series_equal(result, ser)
+
+
+@pytest.mark.parametrize(
+    "strrep",
+    [
+        "243.164",
+        "245.968",
+        "249.585",
+        "259.745",
+        "265.742",
+        "272.567",
+        "279.196",
+        "280.366",
+        "275.034",
+        "271.351",
+        "272.889",
+        "270.627",
+        "280.828",
+        "290.383",
+        "308.153",
+        "319.945",
+        "336.0",
+        "344.09",
+        "351.385",
+        "356.178",
+        "359.82",
+        "361.03",
+        "367.701",
+        "380.812",
+        "387.98",
+        "391.749",
+        "391.171",
+        "385.97",
+        "385.345",
+        "386.121",
+        "390.996",
+        "399.734",
+        "413.073",
+        "421.532",
+        "430.221",
+        "437.092",
+        "439.746",
+        "446.01",
+        "451.191",
+        "460.463",
+        "469.779",
+        "472.025",
+        "479.49",
+        "474.864",
+        "467.54",
+        "471.978",
+    ],
+)
+def test_precision_float_conversion(strrep):
+    # GH 31364
+    result = to_numeric(strrep)
+
+    assert result == float(strrep)

From 4193e0354e10f23ba2bce9fef2078921465ceead Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 14 Sep 2020 20:23:32 -0700
Subject: [PATCH 69/71] post-rebase fixup

---
 pandas/core/groupby/generic.py | 61 +++++++++++++++++++++++++++-------
 pandas/core/groupby/ops.py     |  4 +--
 2 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index d4e673d2e538c..e07d434684ee7 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -75,7 +75,14 @@
     group_selection_context,
 )
 from pandas.core.groupby.numba_ import generate_numba_func, split_for_numba
-from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same
+from pandas.core.indexes.api import (
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    PeriodIndex,
+    TimedeltaIndex,
+    all_indexes_same,
+)
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager
 from pandas.core.series import Series
@@ -257,17 +264,27 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             if self.grouper.nkeys > 1:
                 return self._python_agg_general(func, *args, **kwargs)
 
-            try:
-                return self._python_agg_general(func, *args, **kwargs)
-            except (ValueError, KeyError):
-                # TODO: KeyError is raised in _python_agg_general,
-                #  see see test_groupby.test_basic
-                result = self._aggregate_named(func, *args, **kwargs)
+            if isinstance(
+                self._selected_obj.index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)
+            ):
+                # using _python_agg_general would end up incorrectly patching
+                #  _index_data in reduction.pyx
+                result = self._aggregate_maybe_named(func, *args, **kwargs)
+            else:
+                try:
+                    return self._python_agg_general(func, *args, **kwargs)
+                except (ValueError, KeyError):
+                    # TODO: KeyError is raised in _python_agg_general,
+                    #  see see test_groupby.test_basic
+                    result = self._aggregate_maybe_named(func, *args, **kwargs)
+
+            index = self.grouper.result_index
+            assert index.name == self.grouper.names[0]
 
-            index = Index(sorted(result), name=self.grouper.names[0])
             ret = create_series_with_explicit_dtype(
                 result, index=index, dtype_if_empty=object
             )
+            ret.name = self._selected_obj.name  # test_metadata_propagation_indiv
 
         if not self.as_index:  # pragma: no cover
             print("Warning, ignoring as_index=True")
@@ -470,14 +487,34 @@ def _get_index() -> Index:
             )
             return self._reindex_output(result)
 
-    def _aggregate_named(self, func, *args, **kwargs):
+    def _aggregate_maybe_named(self, func, *args, **kwargs):
+        """
+        Try the named-aggregator first, then unnamed, which better matches
+        what libreduction does.
+        """
+        try:
+            return self._aggregate_named(func, *args, named=True, **kwargs)
+        except KeyError:
+            return self._aggregate_named(func, *args, named=False, **kwargs)
+
+    def _aggregate_named(self, func, *args, named: bool = True, **kwargs):
         result = {}
 
-        for name, group in self:
-            group.name = name
+        for name, group in self:  # TODO: could we have duplicate names?
+            if named:
+                group.name = name
+
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
-                raise ValueError("Must produce aggregated value")
+                if (
+                    isinstance(output, Series)
+                    and len(output) == 1
+                    and name in output.index
+                ):
+                    # FIXME: kludge for test_resampler_grouper.test_apply
+                    output = output.iloc[0]
+                else:
+                    raise ValueError("Must produce aggregated value")
             result[name] = output
 
         return result
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 3ba3c8a0eddc8..955f0463bcccf 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -624,10 +624,10 @@ def agg_series(self, obj: Series, func: F):
             return self._aggregate_series_pure_python(obj, func)
 
         elif obj.index._has_complex_internals or isinstance(
-            obj.index, (RangeIndex, DatetimeIndex, TimedeltaIndex)
+            obj.index, (DatetimeIndex, TimedeltaIndex, RangeIndex)
         ):
             # Preempt TypeError in _aggregate_series_fast
-            # exclude RangeIndex because patching it in libreduction would
+            # exclude RangeIndex/DTI/TDI because patching it in libreduction would
             #  silently be incorrect
             return self._aggregate_series_pure_python(obj, func)
 

From 816f2fcadb3e7c6ee72ab0693d572530bf7aa203 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 15 Sep 2020 08:05:05 -0700
Subject: [PATCH 70/71] revert whitespace mixup

---
 doc/source/whatsnew/v1.2.0.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 7afa4eacd9cf0..8b18b56929acd 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -118,7 +118,6 @@ Other enhancements
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
 - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
 - `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
--
 
 .. _whatsnew_120.api_breaking.python:
 
@@ -324,7 +323,6 @@ Plotting
 - Bug in :meth:`DataFrame.plot` was rotating xticklabels when ``subplots=True``, even if the x-axis wasn't an irregular time series (:issue:`29460`)
 - Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`)
 
-
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 

From 865cb8baa221bdcb5ce5b5dd7c5595506e2f243b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 17 Sep 2020 13:55:41 -0700
Subject: [PATCH 71/71] Implement _can_use_libreduction

---
 pandas/core/generic.py             | 12 ++++++++++++
 pandas/core/groupby/generic.py     |  7 +++----
 pandas/core/groupby/ops.py         | 31 +++++-------------------------
 pandas/core/indexes/base.py        |  4 +++-
 pandas/core/indexes/category.py    |  5 -----
 pandas/core/indexes/interval.py    |  5 -----
 pandas/core/indexes/multi.py       |  5 -----
 pandas/core/indexes/period.py      |  5 -----
 pandas/tests/groupby/test_apply.py |  1 -
 9 files changed, 23 insertions(+), 52 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index cc18b8681200f..85ff334e9c7ef 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -404,6 +404,18 @@ def _data(self):
         #  e.g. fastparquet
         return self._mgr
 
+    @property
+    def _can_use_libreduction(self) -> bool:
+        # groupby ops can only use libreduction fast-path if we are all-numpy
+        if self.index._has_complex_internals:
+            return False
+
+        is_invalid = lambda x: is_extension_array_dtype(x) or x.kind in ["m", "M"]
+        if self.ndim == 1:
+            return not is_invalid(self.dtype)
+        else:
+            return not self.dtypes.apply(is_invalid).any()
+
     # ----------------------------------------------------------------------
     # Axis
     _stat_axis_number = 0
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 3712f5ab9915d..f65a47c9a9e6c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -277,13 +277,12 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     #  see see test_groupby.test_basic
                     result = self._aggregate_maybe_named(func, *args, **kwargs)
 
+            # name setting -> test_metadata_propagation_indiv
             index = self.grouper.result_index
-            assert index.name == self.grouper.names[0]
-
+            obj = self._selected_obj
             ret = create_series_with_explicit_dtype(
-                result, index=index, dtype_if_empty=object
+                result, index=index, dtype_if_empty=object, name=obj.name
             )
-            ret.name = self._selected_obj.name  # test_metadata_propagation_indiv
 
         if not self.as_index:  # pragma: no cover
             print("Warning, ignoring as_index=True")
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 955f0463bcccf..af1f02adf5331 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -45,14 +45,7 @@
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base, grouper
-from pandas.core.indexes.api import (
-    DatetimeIndex,
-    Index,
-    MultiIndex,
-    RangeIndex,
-    TimedeltaIndex,
-    ensure_index,
-)
+from pandas.core.indexes.api import Index, MultiIndex, ensure_index
 from pandas.core.series import Series
 from pandas.core.sorting import (
     compress_group_index,
@@ -163,18 +156,13 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
         result_values = None
 
         sdata: FrameOrSeries = splitter._get_sorted_data()
-        if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)):
-            # calling splitter.fast_apply will raise TypeError via apply_frame_axis0
-            #  if we pass EA instead of ndarray
-            #  TODO: can we have a workaround for EAs backed by ndarray?
-            pass
-
-        elif (
+        if (
             com.get_callable_name(f) not in base.plotting_methods
             and isinstance(splitter, FrameSplitter)
             and axis == 0
             # fast_apply/libreduction doesn't allow non-numpy backed indexes
-            and not sdata.index._has_complex_internals
+            #  or columns
+            and sdata._can_use_libreduction
         ):
             try:
                 result_values, mutated = splitter.fast_apply(f, sdata, group_keys)
@@ -616,19 +604,10 @@ def agg_series(self, obj: Series, func: F):
             # SeriesGrouper would raise if we were to call _aggregate_series_fast
             return self._aggregate_series_pure_python(obj, func)
 
-        elif is_extension_array_dtype(obj.dtype):
+        elif not obj._can_use_libreduction:
             # _aggregate_series_fast would raise TypeError when
             #  calling libreduction.Slider
             # In the datetime64tz case it would incorrectly cast to tz-naive
-            # TODO: can we get a performant workaround for EAs backed by ndarray?
-            return self._aggregate_series_pure_python(obj, func)
-
-        elif obj.index._has_complex_internals or isinstance(
-            obj.index, (DatetimeIndex, TimedeltaIndex, RangeIndex)
-        ):
-            # Preempt TypeError in _aggregate_series_fast
-            # exclude RangeIndex/DTI/TDI because patching it in libreduction would
-            #  silently be incorrect
             return self._aggregate_series_pure_python(obj, func)
 
         try:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 11490e2e0be29..07e4bfba2313e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4053,7 +4053,9 @@ def _has_complex_internals(self) -> bool:
         Indicates if an index is not directly backed by a numpy array
         """
         # used to avoid libreduction code paths, which raise or require conversion
-        return False
+        return isinstance(self, (ABCMultiIndex, ABCRangeIndex)) or not isinstance(
+            self._data, np.ndarray
+        )
 
     def _is_memory_usage_qualified(self) -> bool:
         """
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index c798ae0bd4e4d..77a55bced2187 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -343,11 +343,6 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
-    @property
-    def _has_complex_internals(self) -> bool:
-        # used to avoid libreduction code paths, which raise or require conversion
-        return True
-
     @doc(Index.__contains__)
     def __contains__(self, key: Any) -> bool:
         # if key is a NaN, check if any NaN is in self.
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 9ef584f5b7fbc..2176d7419557e 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -394,11 +394,6 @@ def values(self) -> IntervalArray:
         """
         return self._data
 
-    @property
-    def _has_complex_internals(self) -> bool:
-        # used to avoid libreduction code paths, which raise or require conversion
-        return True
-
     def __array_wrap__(self, result, context=None):
         # we don't want the superclass implementation
         return result
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index a21a54e4a9be3..561402a79fa27 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1508,11 +1508,6 @@ def _get_level_number(self, level) -> int:
                 ) from err
         return level
 
-    @property
-    def _has_complex_internals(self) -> bool:
-        # used to avoid libreduction code paths, which raise or require conversion
-        return True
-
     @cache_readonly
     def is_monotonic_increasing(self) -> bool:
         """
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 42dce1bd53f22..6213aa5b71674 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -247,11 +247,6 @@ def _simple_new(cls, values: PeriodArray, name: Label = None):
     def values(self):
         return np.asarray(self)
 
-    @property
-    def _has_complex_internals(self):
-        # used to avoid libreduction code paths, which raise or require conversion
-        return True
-
     def _shallow_copy(self, values=None, name: Label = no_default):
         name = name if name is not no_default else self.name
         cache = self._cache.copy() if values is None else {}
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index db5c4af9c6f53..93761a186b804 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -1002,7 +1002,6 @@ def test_apply_function_with_indexing_return_column():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(reason="GH-34998")
 def test_apply_with_timezones_aware():
     # GH: 27212