From 1a056df275ea602976c1dd3f8ccbe8295c9bd042 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 31 Dec 2022 17:56:19 +0000
Subject: [PATCH 01/13] Revert "STYLE use pandas-dev-flaker (#40906)"

This reverts commit 1f27ed0adc2fea2dd301d2dccab5215afaca0e6b.
---
 .pre-commit-config.yaml                       |  96 +++-
 asv_bench/benchmarks/pandas_vb_common.py      |   2 +-
 ci/code_checks.sh                             |  71 ++-
 doc/source/development/code_style.rst         | 218 ++++++++
 environment.yml                               |   1 -
 pandas/_testing/__init__.py                   |   2 +-
 pandas/tests/api/test_api.py                  |   2 +-
 pandas/tests/io/pytables/test_store.py        |   2 +-
 pandas/tests/tools/test_to_numeric.py         |   4 +-
 requirements-dev.txt                          |   1 -
 ...check_for_inconsistent_pandas_namespace.py | 142 +++++
 .../test_inconsistent_namespace_check.py      |  61 +++
 scripts/tests/test_use_pd_array_in_core.py    |   2 +-
 .../tests/test_validate_unwanted_patterns.py  | 419 +++++++++++++++
 scripts/validate_unwanted_patterns.py         | 487 ++++++++++++++++++
 15 files changed, 1497 insertions(+), 13 deletions(-)
 create mode 100644 doc/source/development/code_style.rst
 create mode 100644 scripts/check_for_inconsistent_pandas_namespace.py
 create mode 100644 scripts/tests/test_inconsistent_namespace_check.py
 create mode 100644 scripts/tests/test_validate_unwanted_patterns.py
 create mode 100755 scripts/validate_unwanted_patterns.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f3158e64df8dd..f33353d8f6a38 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -70,12 +70,9 @@ repos:
     rev: 6.0.0
     hooks:
     -   id: flake8
-        # Need to patch os.remove rule in pandas-dev-flaker
-        exclude: ^ci/fix_wheels.py
         additional_dependencies: &flake8_dependencies
         - flake8==6.0.0
         - flake8-bugbear==22.7.1
-        - pandas-dev-flaker==0.5.0
 -   repo: https://github.com/pycqa/pylint
     rev: v2.15.6
     hooks:
@@ -184,6 +181,28 @@ repos:
         types: [rst]
         args: [--filename=*.rst]
         additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
+    -   id: frame-or-series-union
+        name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
+        entry: Union\[.*(Series,.*DataFrame|DataFrame,.*Series).*\]
+        language: pygrep
+        types: [python]
+        exclude: ^pandas/_typing\.py$
+    -   id: inconsistent-namespace-usage
+        name: 'Check for inconsistent use of pandas namespace'
+        entry: python scripts/check_for_inconsistent_pandas_namespace.py
+        language: python
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/excel/test_writers\.py
+            |pandas/tests/io/pytables/common\.py
+            |pandas/tests/io/pytables/test_store\.py$
     -   id: unwanted-patterns
         name: Unwanted patterns
         language: pygrep
@@ -193,6 +212,24 @@ repos:
             \#\ type:\ (?!ignore)
             |\#\ type:\s?ignore(?!\[)
 
+            # foo._class__ instead of type(foo)
+            |\.__class__
+
+            # np.bool/np.object instead of np.bool_/np.object_
+            |np\.bool[^_8]
+            |np\.object[^_8]
+
+            # imports from pandas.core.common instead of `import pandas.core.common as com`
+            |from\ pandas\.core\.common\ import
+            |from\ pandas\.core\ import\ common
+
+            # imports from collections.abc instead of `from collections import abc`
+            |from\ collections\.abc\ import
+
+            # Numpy
+            |from\ numpy\ import\ random
+            |from\ numpy\.random\ import
+
             # Incorrect code-block / IPython directives
             |\.\.\ code-block\ ::
             |\.\.\ ipython\ ::
@@ -202,6 +239,7 @@ repos:
             # Check for deprecated messages without sphinx directive
             |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
         types_or: [python, cython, rst]
+        exclude: ^doc/source/development/code_style\.rst  # contains examples of patterns to avoid
     -   id: cython-casting
         name: Check Cython casting is `<type>obj`, not `<type> obj`
         language: pygrep
@@ -232,6 +270,29 @@ repos:
         files: ^pandas/tests/extension/base
         types: [python]
         exclude: ^pandas/tests/extension/base/base\.py
+    -   id: unwanted-patterns-in-tests
+        name: Unwanted patterns in tests
+        language: pygrep
+        entry: |
+            (?x)
+            # pytest.xfail instead of pytest.mark.xfail
+            pytest\.xfail
+
+            # imports from pandas._testing instead of `import pandas._testing as tm`
+            |from\ pandas\._testing\ import
+            |from\ pandas\ import\ _testing\ as\ tm
+
+            # No direct imports from conftest
+            |conftest\ import
+            |import\ conftest
+
+            # pandas.testing instead of tm
+            |pd\.testing\.
+
+            # pd.api.types instead of from pandas.api.types import ...
+            |(pd|pandas)\.api\.types\.
+        files: ^pandas/tests/
+        types_or: [python, cython, rst]
     -   id: pip-to-conda
         name: Generate pip dependency from conda
         language: python
@@ -252,6 +313,35 @@ repos:
         language: python
         types: [rst]
         files: ^doc/source/(development|reference)/
+    -   id: unwanted-patterns-bare-pytest-raises
+        name: Check for use of bare pytest raises
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
+        types: [python]
+        files: ^pandas/tests/
+        exclude: ^pandas/tests/extension/
+    -   id: unwanted-patterns-private-function-across-module
+        name: Check for use of private functions across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: unwanted-patterns-private-import-across-module
+        name: Check for import of private attributes across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: unwanted-patterns-strings-to-concatenate
+        name: Check for use of not concatenated strings
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
+        types_or: [python, cython]
+    -   id: unwanted-patterns-strings-with-wrong-placed-whitespace
+        name: Check for strings with wrong placed spaces
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
+        types_or: [python, cython]
     -   id: use-pd_array-in-core
         name: Import pandas.array as pd_array in core
         language: python
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
index d3168bde0a783..97d91111e833a 100644
--- a/asv_bench/benchmarks/pandas_vb_common.py
+++ b/asv_bench/benchmarks/pandas_vb_common.py
@@ -70,7 +70,7 @@ class BaseIO:
     def remove(self, f):
         """Remove created files"""
         try:
-            os.remove(f)  # noqa: PDF008
+            os.remove(f)
         except OSError:
             # On Windows, attempting to remove a file that is in use
             # causes an exception to be raised
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 3c1362b1ac83e..228cc5518495e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -8,14 +8,16 @@
 #
 # Usage:
 #   $ ./ci/code_checks.sh               # run all checks
+#   $ ./ci/code_checks.sh lint          # run linting only
+#   $ ./ci/code_checks.sh patterns      # check for patterns that should not exist
 #   $ ./ci/code_checks.sh code          # checks on imported code
 #   $ ./ci/code_checks.sh doctests      # run doctests
 #   $ ./ci/code_checks.sh docstrings    # validate docstring errors
 #   $ ./ci/code_checks.sh single-docs   # check single-page docs build warning-free
 #   $ ./ci/code_checks.sh notebooks     # check execution of documentation notebooks
 
-[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
-    { echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }
+[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
+    { echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }
 
 BASE_DIR="$(dirname $0)/.."
 RET=0
@@ -38,6 +40,71 @@ if [[ "$GITHUB_ACTIONS" == "true" ]]; then
     INVGREP_PREPEND="##[error]"
 fi
 
+### LINTING ###
+if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
+
+    # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
+    # it doesn't make a difference, but we want to be internally consistent.
+    # Note: this grep pattern is (intended to be) equivalent to the python
+    # regex r'(?<![ ->])> '
+    MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG
+    invgrep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    # readability/casting: Warnings about C casting instead of C++ casting
+    # runtime/int: Warnings about using C number types instead of C++ ones
+    # build/include_subdir: Warnings about prefacing included header files with directory
+
+fi
+
+### PATTERNS ###
+if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
+
+    MSG='Check for use of exec' ; echo $MSG
+    invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for pytest warns' ; echo $MSG
+    invgrep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for pytest raises without context' ; echo $MSG
+    invgrep -r -E --include '*.py' "[[:space:]] pytest.raises" pandas/tests/
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for use of builtin filter function' ; echo $MSG
+    invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    # Check for the following code in testing: `np.testing` and `np.array_equal`
+    MSG='Check for invalid testing' ; echo $MSG
+    invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
+    MSG='Check for invalid EA testing' ; echo $MSG
+    invgrep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for deprecated messages without sphinx directive' ; echo $MSG
+    invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
+    invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    # Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch`
+    MSG='Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG
+    invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
+    invgrep -R --include=*.{py,pyx} '!r}' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+    echo $MSG "DONE"
+fi
+
 ### CODE ###
 if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
 
diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst
new file mode 100644
index 0000000000000..19d83eb75e5bd
--- /dev/null
+++ b/doc/source/development/code_style.rst
@@ -0,0 +1,218 @@
+.. _code_style:
+
+{{ header }}
+
+=======================
+pandas code style guide
+=======================
+
+.. contents:: Table of contents:
+   :local:
+
+pandas follows the `PEP8 <https://www.python.org/dev/peps/pep-0008/>`_
+standard and uses `Black <https://black.readthedocs.io/en/stable/>`_
+and `Flake8 <https://flake8.pycqa.org/en/latest/>`_ to ensure a
+consistent code format throughout the project. We encourage you to use
+:ref:`pre-commit <contributing.pre-commit>` to automatically run ``black``,
+``flake8``, ``isort``, and related code checks when you make a git commit.
+
+Patterns
+========
+
+Using foo.__class__
+-------------------
+
+
+pandas uses 'type(foo)' instead 'foo.__class__' as it is making the code more
+readable.
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+    foo = "bar"
+    type(foo)
+
+**Bad:**
+
+.. code-block:: python
+
+    foo = "bar"
+    foo.__class__
+
+
+String formatting
+=================
+
+Concatenated strings
+--------------------
+
+Using f-strings
+~~~~~~~~~~~~~~~
+
+pandas uses f-strings formatting instead of '%' and '.format()' string formatters.
+
+The convention of using f-strings on a string that is concatenated over several lines,
+is to prefix only the lines containing values which need to be interpreted.
+
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+    foo = "old_function"
+    bar = "new_function"
+
+    my_warning_message = (
+        f"Warning, {foo} is deprecated, "
+        "please use the new and way better "
+        f"{bar}"
+    )
+
+**Bad:**
+
+.. code-block:: python
+
+    foo = "old_function"
+    bar = "new_function"
+
+    my_warning_message = (
+        f"Warning, {foo} is deprecated, "
+        f"please use the new and way better "
+        f"{bar}"
+    )
+
+White spaces
+~~~~~~~~~~~~
+
+Only put white space at the end of the previous line, so
+there is no whitespace at the beginning of the concatenated string.
+
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+    example_string = (
+        "Some long concatenated string, "
+        "with good placement of the "
+        "whitespaces"
+    )
+
+**Bad:**
+
+.. code-block:: python
+
+    example_string = (
+        "Some long concatenated string,"
+        " with bad placement of the"
+        " whitespaces"
+    )
+
+Representation function (aka 'repr()')
+--------------------------------------
+
+pandas uses 'repr()' instead of '%r' and '!r'.
+
+The use of 'repr()' will only happen when the value is not an obvious string.
+
+For example:
+
+**Good:**
+
+.. code-block:: python
+
+    value = str
+    f"Unknown received value, got: {repr(value)}"
+
+**Good:**
+
+.. code-block:: python
+
+    value = str
+    f"Unknown received type, got: '{type(value).__name__}'"
+
+
+Imports (aim for absolute)
+==========================
+
+In Python 3, absolute imports are recommended. Using absolute imports, doing something
+like ``import string`` will import the string module rather than ``string.py``
+in the same directory. As much as possible, you should try to write out
+absolute imports that show the whole import chain from top-level pandas.
+
+Explicit relative imports are also supported in Python 3 but it is not
+recommended to use them. Implicit relative imports should never be used
+and are removed in Python 3.
+
+For example:
+
+::
+
+    # preferred
+    import pandas.core.common as com
+
+    # not preferred
+    from .common import test_base
+
+    # wrong
+    from common import test_base
+
+Testing
+=======
+
+Failing tests
+--------------
+
+See https://docs.pytest.org/en/latest/skipping.html for background.
+
+Do not use ``pytest.xfail``
+---------------------------
+
+Do not use this method. It has the same behavior as ``pytest.skip``, namely
+it immediately stops the test and does not check if the test will fail. If
+this is the behavior you desire, use ``pytest.skip`` instead.
+
+Using ``pytest.mark.xfail``
+---------------------------
+
+Use this method if a test is known to fail but the manner in which it fails
+is not meant to be captured. It is common to use this method for a test that
+exhibits buggy behavior or a non-implemented feature. If
+the failing test has flaky behavior, use the argument ``strict=False``. This
+will make it so pytest does not fail if the test happens to pass.
+
+Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param``
+over usage within a test so that the test is appropriately marked during the
+collection phase of pytest. For xfailing a test that involves multiple
+parameters, a fixture, or a combination of these, it is only possible to
+xfail during the testing phase. To do so, use the ``request`` fixture:
+
+.. code-block:: python
+
+    import pytest
+
+    def test_xfail(request):
+        request.node.add_marker(pytest.mark.xfail(reason="Indicate why here"))
+
+xfail is not to be used for tests involving failure due to invalid user arguments.
+For these tests, we need to verify the correct exception type and error message
+is being raised, using ``pytest.raises`` instead.
+
+Miscellaneous
+=============
+
+Reading from a url
+------------------
+
+**Good:**
+
+.. code-block:: python
+
+    from pandas.io.common import urlopen
+
+    with urlopen("http://www.google.com") as url:
+        raw_text = url.read()
diff --git a/environment.yml b/environment.yml
index b6b8f7d6af1ba..9eaf52977d6d2 100644
--- a/environment.yml
+++ b/environment.yml
@@ -90,7 +90,6 @@ dependencies:
   - gitdb
   - natsort  # DataFrame.sort_values doctest
   - numpydoc
-  - pandas-dev-flaker=0.5.0
   - pydata-sphinx-theme<0.11
   - pytest-cython  # doctest
   - sphinx
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 43020ae471f10..eb2905751a9b4 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -886,7 +886,7 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
     """
     import pytest
 
-    return pytest.raises(expected_exception, match=None)  # noqa: PDF010
+    return pytest.raises(expected_exception, match=None)
 
 
 cython_table = pd.core.common._cython_table.items()
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 995b1668046d2..e448e1bce9146 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -250,7 +250,7 @@ class TestTesting(Base):
     ]
 
     def test_testing(self):
-        from pandas import testing  # noqa: PDF015
+        from pandas import testing
 
         self.check(testing, self.funcs)
 
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index 1263d61b55cd5..2664c7df59223 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -911,7 +911,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs):
                     os.close(fd)
                 except (OSError, ValueError):
                     pass
-                os.remove(new_f)  # noqa: PDF008
+                os.remove(new_f)
 
         # new table
         df = tm.makeDataFrame()
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 1347f6eb50b09..a95cc41f6397d 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -234,7 +234,9 @@ def test_type_check(errors):
     # see gh-11776
     df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
     kwargs = {"errors": errors} if errors is not None else {}
-    with pytest.raises(TypeError, match="1-d array"):
+    error_ctx = pytest.raises(TypeError, match="1-d array")
+
+    with error_ctx:
         to_numeric(df, **kwargs)
 
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 4f2a80d932fd0..428c2cb92d040 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -65,7 +65,6 @@ gitpython
 gitdb
 natsort
 numpydoc
-pandas-dev-flaker==0.5.0
 pydata-sphinx-theme<0.11
 pytest-cython
 sphinx
diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py
new file mode 100644
index 0000000000000..3c21821e794a9
--- /dev/null
+++ b/scripts/check_for_inconsistent_pandas_namespace.py
@@ -0,0 +1,142 @@
+"""
+Check that test suite file doesn't use the pandas namespace inconsistently.
+
+We check for cases of ``Series`` and ``pd.Series`` appearing in the same file
+(likewise for other pandas objects).
+
+This is meant to be run as a pre-commit hook - to run it manually, you can do:
+
+    pre-commit run inconsistent-namespace-usage --all-files
+
+To automatically fixup a given file, you can pass `--replace`, e.g.
+
+    python scripts/check_for_inconsistent_pandas_namespace.py test_me.py --replace
+
+though note that you may need to manually fixup some imports and that you will also
+need the additional dependency `tokenize-rt` (which is left out from the pre-commit
+hook so that it uses the same virtualenv as the other local ones).
+
+The general structure is similar to that of some plugins from
+https://github.com/asottile/pyupgrade .
+"""
+
+import argparse
+import ast
+import sys
+from typing import (
+    MutableMapping,
+    NamedTuple,
+    Optional,
+    Sequence,
+    Set,
+)
+
+ERROR_MESSAGE = (
+    "{path}:{lineno}:{col_offset}: "
+    "Found both '{prefix}.{name}' and '{name}' in {path}"
+)
+
+
+class OffsetWithNamespace(NamedTuple):
+    lineno: int
+    col_offset: int
+    namespace: str
+
+
+class Visitor(ast.NodeVisitor):
+    def __init__(self) -> None:
+        self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {}
+        self.imported_from_pandas: Set[str] = set()
+
+    def visit_Attribute(self, node: ast.Attribute) -> None:
+        if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}:
+            offset_with_namespace = OffsetWithNamespace(
+                node.lineno, node.col_offset, node.value.id
+            )
+            self.pandas_namespace[offset_with_namespace] = node.attr
+        self.generic_visit(node)
+
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
+        if node.module is not None and "pandas" in node.module:
+            self.imported_from_pandas.update(name.name for name in node.names)
+        self.generic_visit(node)
+
+
+def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str:
+    from tokenize_rt import (
+        reversed_enumerate,
+        src_to_tokens,
+        tokens_to_src,
+    )
+
+    tokens = src_to_tokens(content)
+    for n, i in reversed_enumerate(tokens):
+        offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src)
+        if (
+            offset_with_namespace in visitor.pandas_namespace
+            and visitor.pandas_namespace[offset_with_namespace]
+            in visitor.imported_from_pandas
+        ):
+            # Replace `pd`
+            tokens[n] = i._replace(src="")
+            # Replace `.`
+            tokens[n + 1] = tokens[n + 1]._replace(src="")
+
+    new_src: str = tokens_to_src(tokens)
+    return new_src
+
+
+def check_for_inconsistent_pandas_namespace(
+    content: str, path: str, *, replace: bool
+) -> Optional[str]:
+    tree = ast.parse(content)
+
+    visitor = Visitor()
+    visitor.visit(tree)
+
+    inconsistencies = visitor.imported_from_pandas.intersection(
+        visitor.pandas_namespace.values()
+    )
+
+    if not inconsistencies:
+        # No inconsistent namespace usage, nothing to replace.
+        return None
+
+    if not replace:
+        inconsistency = inconsistencies.pop()
+        lineno, col_offset, prefix = next(
+            key for key, val in visitor.pandas_namespace.items() if val == inconsistency
+        )
+        msg = ERROR_MESSAGE.format(
+            lineno=lineno,
+            col_offset=col_offset,
+            prefix=prefix,
+            name=inconsistency,
+            path=path,
+        )
+        sys.stdout.write(msg)
+        sys.exit(1)
+
+    return replace_inconsistent_pandas_namespace(visitor, content)
+
+
+def main(argv: Optional[Sequence[str]] = None) -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("paths", nargs="*")
+    parser.add_argument("--replace", action="store_true")
+    args = parser.parse_args(argv)
+
+    for path in args.paths:
+        with open(path, encoding="utf-8") as fd:
+            content = fd.read()
+        new_content = check_for_inconsistent_pandas_namespace(
+            content, path, replace=args.replace
+        )
+        if not args.replace or new_content is None:
+            continue
+        with open(path, "w", encoding="utf-8") as fd:
+            fd.write(new_content)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py
new file mode 100644
index 0000000000000..eb995158d8cb4
--- /dev/null
+++ b/scripts/tests/test_inconsistent_namespace_check.py
@@ -0,0 +1,61 @@
+import pytest
+
+from ..check_for_inconsistent_pandas_namespace import (
+    check_for_inconsistent_pandas_namespace,
+)
+
+BAD_FILE_0 = (
+    "from pandas import Categorical\n"
+    "cat_0 = Categorical()\n"
+    "cat_1 = pd.Categorical()"
+)
+BAD_FILE_1 = (
+    "from pandas import Categorical\n"
+    "cat_0 = pd.Categorical()\n"
+    "cat_1 = Categorical()"
+)
+BAD_FILE_2 = (
+    "from pandas import Categorical\n"
+    "cat_0 = pandas.Categorical()\n"
+    "cat_1 = Categorical()"
+)
+GOOD_FILE_0 = (
+    "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
+)
+GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()"
+GOOD_FILE_2 = "from array import array\nimport pandas as pd\narr = pd.array([])"
+PATH = "t.py"
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        (BAD_FILE_0, "t.py:3:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
+        (BAD_FILE_1, "t.py:2:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
+        (
+            BAD_FILE_2,
+            "t.py:2:8: Found both 'pandas.Categorical' and 'Categorical' in t.py",
+        ),
+    ],
+)
+def test_inconsistent_usage(content, expected, capsys):
+    with pytest.raises(SystemExit):
+        check_for_inconsistent_pandas_namespace(content, PATH, replace=False)
+    result, _ = capsys.readouterr()
+    assert result == expected
+
+
+@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1, GOOD_FILE_2])
+@pytest.mark.parametrize("replace", [True, False])
+def test_consistent_usage(content, replace):
+    # should not raise
+    check_for_inconsistent_pandas_namespace(content, PATH, replace=replace)
+
+
+@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1, BAD_FILE_2])
+def test_inconsistent_usage_with_replace(content):
+    result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True)
+    expected = (
+        "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
+    )
+    assert result == expected
diff --git a/scripts/tests/test_use_pd_array_in_core.py b/scripts/tests/test_use_pd_array_in_core.py
index 8f13a6e735899..9c66199a82846 100644
--- a/scripts/tests/test_use_pd_array_in_core.py
+++ b/scripts/tests/test_use_pd_array_in_core.py
@@ -14,7 +14,7 @@ def test_inconsistent_usage(content, capsys):
     result_msg = (
         "t.py:2:0: Don't use pd.array in core, import array as pd_array instead\n"
     )
-    with pytest.raises(SystemExit, match=None):
+    with pytest.raises(SystemExit):
         use_pd_array(content, PATH)
     expected_msg, _ = capsys.readouterr()
     assert result_msg == expected_msg
diff --git a/scripts/tests/test_validate_unwanted_patterns.py b/scripts/tests/test_validate_unwanted_patterns.py
new file mode 100644
index 0000000000000..ef93fd1d21981
--- /dev/null
+++ b/scripts/tests/test_validate_unwanted_patterns.py
@@ -0,0 +1,419 @@
+import io
+
+import pytest
+
+from .. import validate_unwanted_patterns
+
+
+class TestBarePytestRaises:
+    @pytest.mark.parametrize(
+        "data",
+        [
+            (
+                """
+    with pytest.raises(ValueError, match="foo"):
+        pass
+    """
+            ),
+            (
+                """
+    # with pytest.raises(ValueError, match="foo"):
+    #    pass
+    """
+            ),
+            (
+                """
+    # with pytest.raises(ValueError):
+    #    pass
+    """
+            ),
+            (
+                """
+    with pytest.raises(
+        ValueError,
+        match="foo"
+    ):
+        pass
+    """
+            ),
+        ],
+    )
+    def test_pytest_raises(self, data):
+        fd = io.StringIO(data.strip())
+        result = list(validate_unwanted_patterns.bare_pytest_raises(fd))
+        assert result == []
+
+    @pytest.mark.parametrize(
+        "data, expected",
+        [
+            (
+                (
+                    """
+    with pytest.raises(ValueError):
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(ValueError, match="foo"):
+        with pytest.raises(ValueError):
+            pass
+        pass
+    """
+                ),
+                [
+                    (
+                        2,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match="foo"):
+            pass
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(
+        ValueError
+    ):
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(
+        ValueError,
+        # match = "foo"
+    ):
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+        ],
+    )
+    def test_pytest_raises_raises(self, data, expected):
+        fd = io.StringIO(data.strip())
+        result = list(validate_unwanted_patterns.bare_pytest_raises(fd))
+        assert result == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            'msg = ("bar " "baz")',
+            [
+                (
+                    1,
+                    (
+                        "String unnecessarily split in two by black. "
+                        "Please merge them manually."
+                    ),
+                )
+            ],
+        ),
+        (
+            'msg = ("foo " "bar " "baz")',
+            [
+                (
+                    1,
+                    (
+                        "String unnecessarily split in two by black. "
+                        "Please merge them manually."
+                    ),
+                ),
+                (
+                    1,
+                    (
+                        "String unnecessarily split in two by black. "
+                        "Please merge them manually."
+                    ),
+                ),
+            ],
+        ),
+    ],
+)
+def test_strings_to_concatenate(data, expected):
+    fd = io.StringIO(data.strip())
+    result = list(validate_unwanted_patterns.strings_to_concatenate(fd))
+    assert result == expected
+
+
+class TestStringsWithWrongPlacedWhitespace:
+    @pytest.mark.parametrize(
+        "data",
+        [
+            (
+                """
+    msg = (
+        "foo\n"
+        " bar"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        "foo"
+        "  bar"
+        "baz"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        f"foo"
+        "  bar"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        "foo"
+        f"  bar"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        "foo"
+        rf"  bar"
+    )
+    """
+            ),
+        ],
+    )
+    def test_strings_with_wrong_placed_whitespace(self, data):
+        fd = io.StringIO(data.strip())
+        result = list(
+            validate_unwanted_patterns.strings_with_wrong_placed_whitespace(fd)
+        )
+        assert result == []
+
+    @pytest.mark.parametrize(
+        "data, expected",
+        [
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        " bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        f"foo"
+        " bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        f" bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        f"foo"
+        f" bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        rf" bar"
+        " baz"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                    (
+                        4,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        " bar"
+        rf" baz"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                    (
+                        4,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        rf" bar"
+        rf" baz"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                    (
+                        4,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                ],
+            ),
+        ],
+    )
+    def test_strings_with_wrong_placed_whitespace_raises(self, data, expected):
+        fd = io.StringIO(data.strip())
+        result = list(
+            validate_unwanted_patterns.strings_with_wrong_placed_whitespace(fd)
+        )
+        assert result == expected
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
new file mode 100755
index 0000000000000..f90fd6c3e8a3e
--- /dev/null
+++ b/scripts/validate_unwanted_patterns.py
@@ -0,0 +1,487 @@
+#!/usr/bin/env python3
+"""
+Unwanted patterns test cases.
+
+The reason this file exist despite the fact we already have
+`ci/code_checks.sh`,
+(see https://github.com/pandas-dev/pandas/blob/master/ci/code_checks.sh)
+
+is that some of the test cases are more complex/impossible to validate via regex.
+So this file is somewhat an extensions to `ci/code_checks.sh`
+"""
+
+import argparse
+import ast
+import sys
+import token
+import tokenize
+from typing import (
+    IO,
+    Callable,
+    Iterable,
+    List,
+    Set,
+    Tuple,
+)
+
+PRIVATE_IMPORTS_TO_IGNORE: Set[str] = {
+    "_extension_array_shared_docs",
+    "_index_shared_docs",
+    "_interval_shared_docs",
+    "_merge_doc",
+    "_shared_docs",
+    "_apply_docs",
+    "_new_Index",
+    "_new_PeriodIndex",
+    "_doc_template",
+    "_agg_template",
+    "_pipe_template",
+    "__main__",
+    "_transform_template",
+    "_flex_comp_doc_FRAME",
+    "_op_descriptions",
+    "_IntegerDtype",
+    "_use_inf_as_na",
+    "_get_plot_backend",
+    "_matplotlib",
+    "_arrow_utils",
+    "_registry",
+    "_get_offset",  # TODO: remove after get_offset deprecation enforced
+    "_test_parse_iso8601",
+    "_json_normalize",  # TODO: remove after deprecation is enforced
+    "_testing",
+    "_test_decorators",
+    "__version__",  # check np.__version__ in compat.numpy.function
+}
+
+
+def _get_literal_string_prefix_len(token_string: str) -> int:
+    """
+    Getting the length of the literal string prefix.
+
+    Parameters
+    ----------
+    token_string : str
+        String to check.
+
+    Returns
+    -------
+    int
+        Length of the literal string prefix.
+
+    Examples
+    --------
+    >>> example_string = "'Hello world'"
+    >>> _get_literal_string_prefix_len(example_string)
+    0
+    >>> example_string = "r'Hello world'"
+    >>> _get_literal_string_prefix_len(example_string)
+    1
+    """
+    try:
+        return min(
+            token_string.find(quote)
+            for quote in (r"'", r'"')
+            if token_string.find(quote) >= 0
+        )
+    except ValueError:
+        return 0
+
+
+def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Test Case for bare pytest raises.
+
+    For example, this is wrong:
+
+    >>> with pytest.raise(ValueError):
+    ...     # Some code that raises ValueError
+
+    And this is what we want instead:
+
+    >>> with pytest.raise(ValueError, match="foo"):
+    ...     # Some code that raises ValueError
+
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+
+    Yields
+    ------
+    line_number : int
+        Line number of unconcatenated string.
+    msg : str
+        Explanation of the error.
+
+    Notes
+    -----
+    GH #23922
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call):
+            continue
+
+        try:
+            if not (node.func.value.id == "pytest" and node.func.attr == "raises"):
+                continue
+        except AttributeError:
+            continue
+
+        if not node.keywords:
+            yield (
+                node.lineno,
+                "Bare pytests raise have been found. "
+                "Please pass in the argument 'match' as well the exception.",
+            )
+        else:
+            # Means that there are arguments that are being passed in,
+            # now we validate that `match` is one of the passed in arguments
+            if not any(keyword.arg == "match" for keyword in node.keywords):
+                yield (
+                    node.lineno,
+                    "Bare pytests raise have been found. "
+                    "Please pass in the argument 'match' as well the exception.",
+                )
+
+
+PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"}  # no known alternative
+
+
+def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Checking that a private function is not used across modules.
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+    Yields
+    ------
+    line_number : int
+        Line number of the private function that is used across modules.
+    msg : str
+        Explanation of the error.
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    imported_modules: Set[str] = set()
+
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.Import, ast.ImportFrom)):
+            for module in node.names:
+                module_fqdn = module.name if module.asname is None else module.asname
+                imported_modules.add(module_fqdn)
+
+        if not isinstance(node, ast.Call):
+            continue
+
+        try:
+            module_name = node.func.value.id
+            function_name = node.func.attr
+        except AttributeError:
+            continue
+
+        # Exception section #
+
+        # (Debatable) Class case
+        if module_name[0].isupper():
+            continue
+        # (Debatable) Dunder methods case
+        elif function_name.startswith("__") and function_name.endswith("__"):
+            continue
+        elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED:
+            continue
+
+        if module_name in imported_modules and function_name.startswith("_"):
+            yield (node.lineno, f"Private function '{module_name}.{function_name}'")
+
+
+def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Checking that a private function is not imported across modules.
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+    Yields
+    ------
+    line_number : int
+        Line number of import statement, that imports the private function.
+    msg : str
+        Explanation of the error.
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    for node in ast.walk(tree):
+        if not (isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)):
+            continue
+
+        for module in node.names:
+            module_name = module.name.split(".")[-1]
+            if module_name in PRIVATE_IMPORTS_TO_IGNORE:
+                continue
+
+            if module_name.startswith("_"):
+                yield (node.lineno, f"Import of internal function {repr(module_name)}")
+
+
+def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    This test case is necessary after 'Black' (https://github.com/psf/black),
+    is formatting strings over multiple lines.
+
+    For example, when this:
+
+    >>> foo = (
+    ...     "bar "
+    ...     "baz"
+    ... )
+
+    Is becoming this:
+
+    >>> foo = ("bar " "baz")
+
+    'Black' is not considering this as an
+    issue (see https://github.com/psf/black/issues/1051),
+    so we are checking it here instead.
+
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+
+    Yields
+    ------
+    line_number : int
+        Line number of unconcatenated string.
+    msg : str
+        Explanation of the error.
+
+    Notes
+    -----
+    GH #30454
+    """
+    tokens: List = list(tokenize.generate_tokens(file_obj.readline))
+
+    for current_token, next_token in zip(tokens, tokens[1:]):
+        if current_token.type == next_token.type == token.STRING:
+            yield (
+                current_token.start[0],
+                (
+                    "String unnecessarily split in two by black. "
+                    "Please merge them manually."
+                ),
+            )
+
+
+def strings_with_wrong_placed_whitespace(
+    file_obj: IO[str],
+) -> Iterable[Tuple[int, str]]:
+    """
+    Test case for leading spaces in concated strings.
+
+    For example:
+
+    >>> rule = (
+    ...    "We want the space at the end of the line, "
+    ...    "not at the beginning"
+    ... )
+
+    Instead of:
+
+    >>> rule = (
+    ...    "We want the space at the end of the line,"
+    ...    " not at the beginning"
+    ... )
+
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+
+    Yields
+    ------
+    line_number : int
+        Line number of unconcatenated string.
+    msg : str
+        Explanation of the error.
+    """
+
+    def has_wrong_whitespace(first_line: str, second_line: str) -> bool:
+        """
+        Checking if the two lines are mattching the unwanted pattern.
+
+        Parameters
+        ----------
+        first_line : str
+            First line to check.
+        second_line : str
+            Second line to check.
+
+        Returns
+        -------
+        bool
+            True if the two received string match, an unwanted pattern.
+
+        Notes
+        -----
+        The unwanted pattern that we are trying to catch is if the spaces in
+        a string that is concatenated over multiple lines are placed at the
+        end of each string, unless this string is ending with a
+        newline character (\n).
+
+        For example, this is bad:
+
+        >>> rule = (
+        ...    "We want the space at the end of the line,"
+        ...    " not at the beginning"
+        ... )
+
+        And what we want is:
+
+        >>> rule = (
+        ...    "We want the space at the end of the line, "
+        ...    "not at the beginning"
+        ... )
+
+        And if the string is ending with a new line character (\n) we
+        do not want any trailing whitespaces after it.
+
+        For example, this is bad:
+
+        >>> rule = (
+        ...    "We want the space at the begging of "
+        ...    "the line if the previous line is ending with a \n "
+        ...    "not at the end, like always"
+        ... )
+
+        And what we do want is:
+
+        >>> rule = (
+        ...    "We want the space at the begging of "
+        ...    "the line if the previous line is ending with a \n"
+        ...    " not at the end, like always"
+        ... )
+        """
+        if first_line.endswith(r"\n"):
+            return False
+        elif first_line.startswith("  ") or second_line.startswith("  "):
+            return False
+        elif first_line.endswith("  ") or second_line.endswith("  "):
+            return False
+        elif (not first_line.endswith(" ")) and second_line.startswith(" "):
+            return True
+        return False
+
+    tokens: List = list(tokenize.generate_tokens(file_obj.readline))
+
+    for first_token, second_token, third_token in zip(tokens, tokens[1:], tokens[2:]):
+        # Checking if we are in a block of concated string
+        if (
+            first_token.type == third_token.type == token.STRING
+            and second_token.type == token.NL
+        ):
+            # Striping the quotes, with the string literal prefix
+            first_string: str = first_token.string[
+                _get_literal_string_prefix_len(first_token.string) + 1 : -1
+            ]
+            second_string: str = third_token.string[
+                _get_literal_string_prefix_len(third_token.string) + 1 : -1
+            ]
+
+            if has_wrong_whitespace(first_string, second_string):
+                yield (
+                    third_token.start[0],
+                    (
+                        "String has a space at the beginning instead "
+                        "of the end of the previous string."
+                    ),
+                )
+
+
+def main(
+    function: Callable[[IO[str]], Iterable[Tuple[int, str]]],
+    source_path: str,
+    output_format: str,
+) -> bool:
+    """
+    Main entry point of the script.
+
+    Parameters
+    ----------
+    function : Callable
+        Function to execute for the specified validation type.
+    source_path : str
+        Source path representing path to a file/directory.
+    output_format : str
+        Output format of the error message.
+    file_extensions_to_check : str
+        Comma separated values of what file extensions to check.
+    excluded_file_paths : str
+        Comma separated values of what file paths to exclude during the check.
+
+    Returns
+    -------
+    bool
+        True if found any patterns are found related to the given function.
+
+    Raises
+    ------
+    ValueError
+        If the `source_path` is not pointing to existing file/directory.
+    """
+    is_failed: bool = False
+
+    for file_path in source_path:
+        with open(file_path, encoding="utf-8") as file_obj:
+            for line_number, msg in function(file_obj):
+                is_failed = True
+                print(
+                    output_format.format(
+                        source_path=file_path, line_number=line_number, msg=msg
+                    )
+                )
+
+    return is_failed
+
+
+if __name__ == "__main__":
+    available_validation_types: List[str] = [
+        "bare_pytest_raises",
+        "private_function_across_module",
+        "private_import_across_module",
+        "strings_to_concatenate",
+        "strings_with_wrong_placed_whitespace",
+    ]
+
+    parser = argparse.ArgumentParser(description="Unwanted patterns checker.")
+
+    parser.add_argument("paths", nargs="*", help="Source paths of files to check.")
+    parser.add_argument(
+        "--format",
+        "-f",
+        default="{source_path}:{line_number}:{msg}",
+        help="Output format of the error message.",
+    )
+    parser.add_argument(
+        "--validation-type",
+        "-vt",
+        choices=available_validation_types,
+        required=True,
+        help="Validation test case to check.",
+    )
+
+    args = parser.parse_args()
+
+    sys.exit(
+        main(
+            function=globals().get(args.validation_type),
+            source_path=args.paths,
+            output_format=args.format,
+        )
+    )

From a747171f438aa35584928f802b043f56d4f5cd7c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 31 Dec 2022 19:00:45 +0000
Subject: [PATCH 02/13] fixups

---
 .pre-commit-config.yaml                       |  24 +-
 doc/scripts/eval_performance.py               |   3 +-
 doc/source/development/code_style.rst         | 218 ------------------
 .../development/contributing_codebase.rst     |   2 +-
 doc/source/whatsnew/v1.4.0.rst                |   2 +-
 pandas/__init__.py                            |   2 +-
 pandas/_libs/lib.pyx                          |   2 +-
 pandas/compat/__init__.py                     |  11 +-
 pandas/core/arrays/string_arrow.py            |   4 +-
 pandas/core/generic.py                        |   2 +-
 pandas/core/interchange/dataframe.py          |   8 +-
 pandas/core/internals/managers.py             |   4 +-
 pandas/core/strings/base.py                   |   4 +-
 pandas/core/strings/object_array.py           |   4 +-
 pandas/core/window/ewm.py                     |   2 +-
 pandas/io/common.py                           |   6 +-
 pandas/io/json/_json.py                       |   4 +-
 pandas/io/xml.py                              |  11 +-
 pandas/tests/arrays/boolean/test_function.py  |   2 +-
 pandas/tests/io/formats/style/test_style.py   |   4 +-
 pandas/tests/io/pytables/test_round_trip.py   |   2 +-
 pandas/tests/io/test_common.py                |   4 +-
 pandas/tests/io/test_compression.py           |   2 +-
 pandas/tests/io/test_pickle.py                |   2 +-
 pandas/tests/tools/test_to_numeric.py         |   4 +-
 scripts/sync_flake8_versions.py               |  13 +-
 scripts/tests/test_sync_flake8_versions.py    |   3 -
 scripts/tests/test_use_pd_array_in_core.py    |   2 +-
 setup.cfg                                     |  16 --
 29 files changed, 57 insertions(+), 310 deletions(-)
 delete mode 100644 doc/source/development/code_style.rst

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f33353d8f6a38..0d2179bc3e0e1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -181,15 +181,10 @@ repos:
         types: [rst]
         args: [--filename=*.rst]
         additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
-    -   id: frame-or-series-union
-        name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
-        entry: Union\[.*(Series,.*DataFrame|DataFrame,.*Series).*\]
-        language: pygrep
-        types: [python]
-        exclude: ^pandas/_typing\.py$
     -   id: inconsistent-namespace-usage
         name: 'Check for inconsistent use of pandas namespace'
         entry: python scripts/check_for_inconsistent_pandas_namespace.py
+        exclude: ^pandas/core/interchange/
         language: python
         types: [python]
     -   id: no-os-remove
@@ -200,9 +195,7 @@ repos:
         files: ^pandas/tests/
         exclude: |
             (?x)^
-            pandas/tests/io/excel/test_writers\.py
-            |pandas/tests/io/pytables/common\.py
-            |pandas/tests/io/pytables/test_store\.py$
+            pandas/tests/io/pytables/test_store\.py$
     -   id: unwanted-patterns
         name: Unwanted patterns
         language: pygrep
@@ -216,12 +209,8 @@ repos:
             |\.__class__
 
             # np.bool/np.object instead of np.bool_/np.object_
-            |np\.bool[^_8]
-            |np\.object[^_8]
-
-            # imports from pandas.core.common instead of `import pandas.core.common as com`
-            |from\ pandas\.core\.common\ import
-            |from\ pandas\.core\ import\ common
+            |np\.bool[^_8`]
+            |np\.object[^_8`]
 
             # imports from collections.abc instead of `from collections import abc`
             |from\ collections\.abc\ import
@@ -331,7 +320,10 @@ repos:
         language: python
         entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
         types: [python]
-        exclude: ^(asv_bench|pandas/tests|doc)/
+        exclude: |
+            (?x)
+            ^(asv_bench|pandas/tests|doc)/
+            |scripts/validate_min_versions_in_sync\.py$
     -   id: unwanted-patterns-strings-to-concatenate
         name: Check for use of not concatenated strings
         language: python
diff --git a/doc/scripts/eval_performance.py b/doc/scripts/eval_performance.py
index 85d9ce4ad01e9..f6087e02a9330 100644
--- a/doc/scripts/eval_performance.py
+++ b/doc/scripts/eval_performance.py
@@ -6,8 +6,7 @@
 from pandas import DataFrame
 
 setup_common = """from pandas import DataFrame
-from numpy.random import randn
-df = DataFrame(randn(%d, 3), columns=list('abc'))
+df = DataFrame(np.random.randn(%d, 3), columns=list('abc'))
 %s"""
 
 setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst
deleted file mode 100644
index 19d83eb75e5bd..0000000000000
--- a/doc/source/development/code_style.rst
+++ /dev/null
@@ -1,218 +0,0 @@
-.. _code_style:
-
-{{ header }}
-
-=======================
-pandas code style guide
-=======================
-
-.. contents:: Table of contents:
-   :local:
-
-pandas follows the `PEP8 <https://www.python.org/dev/peps/pep-0008/>`_
-standard and uses `Black <https://black.readthedocs.io/en/stable/>`_
-and `Flake8 <https://flake8.pycqa.org/en/latest/>`_ to ensure a
-consistent code format throughout the project. We encourage you to use
-:ref:`pre-commit <contributing.pre-commit>` to automatically run ``black``,
-``flake8``, ``isort``, and related code checks when you make a git commit.
-
-Patterns
-========
-
-Using foo.__class__
--------------------
-
-
-pandas uses 'type(foo)' instead 'foo.__class__' as it is making the code more
-readable.
-For example:
-
-**Good:**
-
-.. code-block:: python
-
-    foo = "bar"
-    type(foo)
-
-**Bad:**
-
-.. code-block:: python
-
-    foo = "bar"
-    foo.__class__
-
-
-String formatting
-=================
-
-Concatenated strings
---------------------
-
-Using f-strings
-~~~~~~~~~~~~~~~
-
-pandas uses f-strings formatting instead of '%' and '.format()' string formatters.
-
-The convention of using f-strings on a string that is concatenated over several lines,
-is to prefix only the lines containing values which need to be interpreted.
-
-For example:
-
-**Good:**
-
-.. code-block:: python
-
-    foo = "old_function"
-    bar = "new_function"
-
-    my_warning_message = (
-        f"Warning, {foo} is deprecated, "
-        "please use the new and way better "
-        f"{bar}"
-    )
-
-**Bad:**
-
-.. code-block:: python
-
-    foo = "old_function"
-    bar = "new_function"
-
-    my_warning_message = (
-        f"Warning, {foo} is deprecated, "
-        f"please use the new and way better "
-        f"{bar}"
-    )
-
-White spaces
-~~~~~~~~~~~~
-
-Only put white space at the end of the previous line, so
-there is no whitespace at the beginning of the concatenated string.
-
-For example:
-
-**Good:**
-
-.. code-block:: python
-
-    example_string = (
-        "Some long concatenated string, "
-        "with good placement of the "
-        "whitespaces"
-    )
-
-**Bad:**
-
-.. code-block:: python
-
-    example_string = (
-        "Some long concatenated string,"
-        " with bad placement of the"
-        " whitespaces"
-    )
-
-Representation function (aka 'repr()')
---------------------------------------
-
-pandas uses 'repr()' instead of '%r' and '!r'.
-
-The use of 'repr()' will only happen when the value is not an obvious string.
-
-For example:
-
-**Good:**
-
-.. code-block:: python
-
-    value = str
-    f"Unknown received value, got: {repr(value)}"
-
-**Good:**
-
-.. code-block:: python
-
-    value = str
-    f"Unknown received type, got: '{type(value).__name__}'"
-
-
-Imports (aim for absolute)
-==========================
-
-In Python 3, absolute imports are recommended. Using absolute imports, doing something
-like ``import string`` will import the string module rather than ``string.py``
-in the same directory. As much as possible, you should try to write out
-absolute imports that show the whole import chain from top-level pandas.
-
-Explicit relative imports are also supported in Python 3 but it is not
-recommended to use them. Implicit relative imports should never be used
-and are removed in Python 3.
-
-For example:
-
-::
-
-    # preferred
-    import pandas.core.common as com
-
-    # not preferred
-    from .common import test_base
-
-    # wrong
-    from common import test_base
-
-Testing
-=======
-
-Failing tests
---------------
-
-See https://docs.pytest.org/en/latest/skipping.html for background.
-
-Do not use ``pytest.xfail``
----------------------------
-
-Do not use this method. It has the same behavior as ``pytest.skip``, namely
-it immediately stops the test and does not check if the test will fail. If
-this is the behavior you desire, use ``pytest.skip`` instead.
-
-Using ``pytest.mark.xfail``
----------------------------
-
-Use this method if a test is known to fail but the manner in which it fails
-is not meant to be captured. It is common to use this method for a test that
-exhibits buggy behavior or a non-implemented feature. If
-the failing test has flaky behavior, use the argument ``strict=False``. This
-will make it so pytest does not fail if the test happens to pass.
-
-Prefer the decorator ``@pytest.mark.xfail`` and the argument ``pytest.param``
-over usage within a test so that the test is appropriately marked during the
-collection phase of pytest. For xfailing a test that involves multiple
-parameters, a fixture, or a combination of these, it is only possible to
-xfail during the testing phase. To do so, use the ``request`` fixture:
-
-.. code-block:: python
-
-    import pytest
-
-    def test_xfail(request):
-        request.node.add_marker(pytest.mark.xfail(reason="Indicate why here"))
-
-xfail is not to be used for tests involving failure due to invalid user arguments.
-For these tests, we need to verify the correct exception type and error message
-is being raised, using ``pytest.raises`` instead.
-
-Miscellaneous
-=============
-
-Reading from a url
-------------------
-
-**Good:**
-
-.. code-block:: python
-
-    from pandas.io.common import urlopen
-
-    with urlopen("http://www.google.com") as url:
-        raw_text = url.read()
diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
index b05f026bbbb44..449b6de36cd24 100644
--- a/doc/source/development/contributing_codebase.rst
+++ b/doc/source/development/contributing_codebase.rst
@@ -43,7 +43,7 @@ Pre-commit
 ----------
 
 Additionally, :ref:`Continuous Integration <contributing.ci>` will run code formatting checks
-like ``black``, ``flake8`` (including a `pandas-dev-flaker <https://github.com/pandas-dev/pandas-dev-flaker>`_ plugin),
+like ``black``, ``flake8``,
 ``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
 Any warnings from these checks will cause the :ref:`Continuous Integration <contributing.ci>` to fail; therefore,
 it is helpful to run the check yourself before submitting code. This
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 5895a06792ffb..9dbe450261e54 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -320,7 +320,7 @@ Null-values are no longer coerced to NaN-value in value_counts and mode
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``,
-``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This
+``NaT`` and other null-values to a NaN-value for ``np.object_``-dtype. This
 behavior is now consistent with ``unique``, ``isin`` and others
 (:issue:`42688`).
 
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 951cb38656d0b..048d20f0de72f 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -135,7 +135,7 @@
 )
 
 from pandas import api, arrays, errors, io, plotting, tseries
-from pandas import testing  # noqa:PDF015
+from pandas import testing
 from pandas.util._print_versions import show_versions
 
 from pandas.io.api import (
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index bc7b876cb5de8..b56cf2a23a45f 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1482,7 +1482,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
         return val
 
     if values.descr.type_num != NPY_OBJECT:
-        # i.e. values.dtype != np.object
+        # i.e. values.dtype != np.object_
         # This should not be reached
         values = values.astype(object)
 
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 085a2a80ca8ec..63e7f9c61ccb1 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -14,7 +14,10 @@
 import sys
 
 from pandas._typing import F
-import pandas.compat._compressors
+from pandas.compat._compressors import (
+    LZMAFile,
+    has_lzma,
+)
 from pandas.compat._constants import (
     IS64,
     PY39,
@@ -131,7 +134,7 @@ def is_ci_environment() -> bool:
     return os.environ.get("PANDAS_CI", "0") == "1"
 
 
-def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]:
+def get_lzma_file() -> type[LZMAFile]:
     """
     Importing the `LZMAFile` class from the `lzma` module.
 
@@ -145,13 +148,13 @@ def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]:
     RuntimeError
         If the `lzma` module was not imported correctly, or didn't exist.
     """
-    if not pandas.compat._compressors.has_lzma:
+    if not has_lzma:
         raise RuntimeError(
             "lzma module not available. "
             "A Python re-install with the proper dependencies, "
             "might be required to solve this issue."
         )
-    return pandas.compat._compressors.LZMAFile
+    return LZMAFile
 
 
 __all__ = [
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 97262b1f4bb21..40d3bcfe7bf12 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from collections.abc import Callable  # noqa: PDF001
+from collections import abc
 import re
 from typing import Union
 
@@ -302,7 +302,7 @@ def _str_endswith(self, pat: str, na=None):
     def _str_replace(
         self,
         pat: str | re.Pattern,
-        repl: str | Callable,
+        repl: str | abc.Callable,
         n: int = -1,
         case: bool = True,
         flags: int = 0,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c893e9ce3d9a9..4ad0faa81eb2a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -133,11 +133,11 @@
 from pandas.core import (
     algorithms as algos,
     arraylike,
+    common,
     indexing,
     nanops,
     sample,
 )
-from pandas.core import common  # noqa: PDF018
 from pandas.core.array_algos.replace import should_use_regex
 from pandas.core.arrays import ExtensionArray
 from pandas.core.base import PandasObject
diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py
index 9139cb41e3af7..0de9b130f0aab 100644
--- a/pandas/core/interchange/dataframe.py
+++ b/pandas/core/interchange/dataframe.py
@@ -7,8 +7,10 @@
 from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
 
 if TYPE_CHECKING:
-    import pandas as pd
-    from pandas import Index
+    from pandas import (
+        DataFrame,
+        Index,
+    )
 
 
 class PandasDataFrameXchg(DataFrameXchg):
@@ -21,7 +23,7 @@ class PandasDataFrameXchg(DataFrameXchg):
     """
 
     def __init__(
-        self, df: pd.DataFrame, nan_as_null: bool = False, allow_copy: bool = True
+        self, df: DataFrame, nan_as_null: bool = False, allow_copy: bool = True
     ) -> None:
         """
         Constructor - an instance of this (private) class is returned from
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 53f347ec4d372..c70a7e29bf3ce 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -15,7 +15,7 @@
 
 import numpy as np
 
-from pandas._config.config import _global_config
+from pandas._config import config
 
 from pandas._libs import (
     algos as libalgos,
@@ -2354,7 +2354,7 @@ def _preprocess_slice_or_indexer(
         return "fancy", indexer, len(indexer)
 
 
-_mode_options = _global_config["mode"]
+_mode_options = config._global_config["mode"]
 
 
 def _using_copy_on_write():
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index b5618207ab9d8..6cce14de61637 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import abc
-from collections.abc import Callable  # noqa: PDF001
+import collections
 import re
 from typing import (
     TYPE_CHECKING,
@@ -69,7 +69,7 @@ def _str_endswith(self, pat, na=None):
     def _str_replace(
         self,
         pat: str | re.Pattern,
-        repl: str | Callable,
+        repl: str | collections.abc.Callable,
         n: int = -1,
         case: bool = True,
         flags: int = 0,
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 2d77cd0da816f..fb2478bfbe44e 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from collections.abc import Callable  # noqa: PDF001
+from collections import abc
 import functools
 import re
 import sys
@@ -153,7 +153,7 @@ def _str_endswith(self, pat, na=None):
     def _str_replace(
         self,
         pat: str | re.Pattern,
-        repl: str | Callable,
+        repl: str | abc.Callable,
         n: int = -1,
         case: bool = True,
         flags: int = 0,
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index a6c32133803d4..c0a7b2b7cc361 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -26,7 +26,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
-from pandas.core import common  # noqa: PDF018
+from pandas.core import common
 from pandas.core.indexers.objects import (
     BaseIndexer,
     ExponentialMovingWindowIndexer,
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 4dae46c8f39f6..338bb6f50ea5d 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -478,7 +478,7 @@ def file_path_to_url(path: str) -> str:
     return urljoin("file:", pathname2url(path))
 
 
-_extension_to_compression = {
+extension_to_compression = {
     ".tar": "tar",
     ".tar.gz": "tar",
     ".tar.bz2": "tar",
@@ -489,7 +489,7 @@ def file_path_to_url(path: str) -> str:
     ".xz": "xz",
     ".zst": "zstd",
 }
-_supported_compressions = set(_extension_to_compression.values())
+_supported_compressions = set(extension_to_compression.values())
 
 
 def get_compression_method(
@@ -565,7 +565,7 @@ def infer_compression(
             return None
 
         # Infer compression from the filename/URL extension
-        for extension, compression in _extension_to_compression.items():
+        for extension, compression in extension_to_compression.items():
             if filepath_or_buffer.lower().endswith(extension):
                 return compression
         return None
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index f2780d5fa6832..88974f3ab4afa 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -57,7 +57,7 @@
 
 from pandas.io.common import (
     IOHandles,
-    _extension_to_compression,
+    extension_to_compression,
     file_exists,
     get_handle,
     is_fsspec_url,
@@ -854,7 +854,7 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         elif (
             isinstance(filepath_or_buffer, str)
             and filepath_or_buffer.lower().endswith(
-                (".json",) + tuple(f".json{c}" for c in _extension_to_compression)
+                (".json",) + tuple(f".json{c}" for c in extension_to_compression)
             )
             and not file_exists(filepath_or_buffer)
         ):
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 4f61455826286..44e550243b938 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -46,10 +46,7 @@
 if TYPE_CHECKING:
     from xml.etree.ElementTree import Element
 
-    from lxml.etree import (
-        _Element,
-        _XSLTResultTree,
-    )
+    from lxml import etree
 
     from pandas import DataFrame
 
@@ -417,7 +414,7 @@ def _validate_names(self) -> None:
 
     def _parse_doc(
         self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
-    ) -> Element | _Element:
+    ) -> Element | etree._Element:
         """
         Build tree from path_or_buffer.
 
@@ -625,7 +622,7 @@ def _validate_names(self) -> None:
 
     def _parse_doc(
         self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
-    ) -> _Element:
+    ) -> etree._Element:
         from lxml.etree import (
             XMLParser,
             fromstring,
@@ -656,7 +653,7 @@ def _parse_doc(
 
         return document
 
-    def _transform_doc(self) -> _XSLTResultTree:
+    def _transform_doc(self) -> etree._XSLTResultTree:
         """
         Transform original tree using stylesheet.
 
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index 8e9112b531fad..b484dc39cf23b 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -67,7 +67,7 @@ def test_ufuncs_unary(ufunc):
 
 
 def test_ufunc_numeric():
-    # np.sqrt on np.bool returns float16, which we upcast to Float32
+    # np.sqrt on np.bool_ returns float16, which we upcast to Float32
     #  bc we do not have Float16
     arr = pd.array([True, False, None], dtype="boolean")
 
diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
index 32ab0336aa93f..46fb614d96633 100644
--- a/pandas/tests/io/formats/style/test_style.py
+++ b/pandas/tests/io/formats/style/test_style.py
@@ -707,13 +707,13 @@ def test_applymap_subset_multiindex(self, slice_):
             and isinstance(slice_[-1][-1], list)
             and "C" in slice_[-1][-1]
         ):
-            ctx = pytest.raises(KeyError, match="C")  # noqa: PDF010
+            ctx = pytest.raises(KeyError, match="C")
         elif (
             isinstance(slice_[0], tuple)
             and isinstance(slice_[0][1], list)
             and 3 in slice_[0][1]
         ):
-            ctx = pytest.raises(KeyError, match="3")  # noqa: PDF010
+            ctx = pytest.raises(KeyError, match="3")
         else:
             ctx = contextlib.nullcontext()
 
diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py
index 53be06cd491ef..fdae2d47b6389 100644
--- a/pandas/tests/io/pytables/test_round_trip.py
+++ b/pandas/tests/io/pytables/test_round_trip.py
@@ -357,7 +357,7 @@ def test_timeseries_preepoch(setup_path):
         _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
     except OverflowError:
         if is_platform_windows():
-            pytest.xfail("known failure on some windows platforms")
+            pytest.mark.xfail("known failure on some windows platforms")
         else:
             raise
 
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index f74d268690a5b..2172a4bf839fb 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -317,7 +317,7 @@ def test_read_expands_user_home_dir(
             ),
         ],
     )
-    @pytest.mark.filterwarnings(  # pytables np.object usage
+    @pytest.mark.filterwarnings(  # pytables np.object_ usage
         "ignore:`np.object` is a deprecated alias:DeprecationWarning"
     )
     def test_read_fspath_all(self, reader, module, path, datapath):
@@ -372,7 +372,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
                     expected = f_path.read()
                     assert result == expected
 
-    @pytest.mark.filterwarnings(  # pytables np.object usage
+    @pytest.mark.filterwarnings(  # pytables np.object_ usage
         "ignore:`np.object` is a deprecated alias:DeprecationWarning"
     )
     def test_write_fspath_hdf5(self):
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 782753177f245..fc15ff3488ce9 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -19,7 +19,7 @@
 import pandas.io.common as icom
 
 _compression_to_extension = {
-    value: key for key, value in icom._extension_to_compression.items()
+    value: key for key, value in icom.extension_to_compression.items()
 }
 
 
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 3dafe6fe61b35..24ac1b72f1476 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -255,7 +255,7 @@ def get_random_path():
 
 class TestCompression:
 
-    _extension_to_compression = icom._extension_to_compression
+    _extension_to_compression = icom.extension_to_compression
 
     def compress_file(self, src_path, dest_path, compression):
         if compression is None:
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index a95cc41f6397d..1347f6eb50b09 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -234,9 +234,7 @@ def test_type_check(errors):
     # see gh-11776
     df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
     kwargs = {"errors": errors} if errors is not None else {}
-    error_ctx = pytest.raises(TypeError, match="1-d array")
-
-    with error_ctx:
+    with pytest.raises(TypeError, match="1-d array"):
         to_numeric(df, **kwargs)
 
 
diff --git a/scripts/sync_flake8_versions.py b/scripts/sync_flake8_versions.py
index 8852634c5d796..0d513d5937dbe 100644
--- a/scripts/sync_flake8_versions.py
+++ b/scripts/sync_flake8_versions.py
@@ -1,5 +1,5 @@
 """
-Check that the flake8 (and pandas-dev-flaker) pins are the same in:
+Check that the flake8 pins are the same in:
 
 - environment.yml
 - .pre-commit-config.yaml, in the flake8 hook
@@ -103,17 +103,13 @@ def get_revisions(
     precommit_config: YamlMapping, environment: YamlMapping
 ) -> tuple[Revisions, Revisions]:
     flake8_revisions = Revisions(name="flake8")
-    pandas_dev_flaker_revisions = Revisions(name="pandas-dev-flaker")
 
     repos = precommit_config["repos"]
     flake8_repo, flake8_hook = _get_repo_hook(repos, "flake8")
     flake8_revisions.pre_commit = Revision("flake8", "==", flake8_repo["rev"])
     flake8_additional_dependencies = []
     for dep in _process_dependencies(flake8_hook.get("additional_dependencies", [])):
-        if dep.name == "pandas-dev-flaker":
-            pandas_dev_flaker_revisions.pre_commit = dep
-        else:
-            flake8_additional_dependencies.append(dep)
+        flake8_additional_dependencies.append(dep)
 
     environment_dependencies = environment["dependencies"]
     environment_additional_dependencies = []
@@ -121,8 +117,6 @@ def get_revisions(
         if dep.name == "flake8":
             flake8_revisions.environment = dep
             environment_additional_dependencies.append(dep)
-        elif dep.name == "pandas-dev-flaker":
-            pandas_dev_flaker_revisions.environment = dep
         else:
             environment_additional_dependencies.append(dep)
 
@@ -131,8 +125,7 @@ def get_revisions(
         environment_additional_dependencies,
     )
 
-    for revisions in flake8_revisions, pandas_dev_flaker_revisions:
-        _validate_revisions(revisions)
+    _validate_revisions(flake8_revisions)
 
 
 if __name__ == "__main__":
diff --git a/scripts/tests/test_sync_flake8_versions.py b/scripts/tests/test_sync_flake8_versions.py
index 743ece34e0b56..2349a4f5d8d1c 100644
--- a/scripts/tests/test_sync_flake8_versions.py
+++ b/scripts/tests/test_sync_flake8_versions.py
@@ -87,7 +87,6 @@ def test_get_revisions_no_failure(capsys):
                     {
                         "id": "flake8",
                         "additional_dependencies": [
-                            "pandas-dev-flaker==0.4.0",
                             "flake8-bugs==1.1.1",
                         ],
                     }
@@ -101,7 +100,6 @@ def test_get_revisions_no_failure(capsys):
                         "id": "yesqa",
                         "additional_dependencies": [
                             "flake8==0.1.1",
-                            "pandas-dev-flaker==0.4.0",
                             "flake8-bugs==1.1.1",
                         ],
                     }
@@ -116,7 +114,6 @@ def test_get_revisions_no_failure(capsys):
             {
                 "pip": [
                     "git+https://github.com/pydata/pydata-sphinx-theme.git@master",
-                    "pandas-dev-flaker==0.4.0",
                 ]
             },
         ]
diff --git a/scripts/tests/test_use_pd_array_in_core.py b/scripts/tests/test_use_pd_array_in_core.py
index 9c66199a82846..8f13a6e735899 100644
--- a/scripts/tests/test_use_pd_array_in_core.py
+++ b/scripts/tests/test_use_pd_array_in_core.py
@@ -14,7 +14,7 @@ def test_inconsistent_usage(content, capsys):
     result_msg = (
         "t.py:2:0: Don't use pd.array in core, import array as pd_array instead\n"
     )
-    with pytest.raises(SystemExit):
+    with pytest.raises(SystemExit, match=None):
         use_pd_array(content, PATH)
     expected_msg, _ = capsys.readouterr()
     assert result_msg == expected_msg
diff --git a/setup.cfg b/setup.cfg
index ef84dd7f9ce85..562ae70fd73ef 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,10 +35,6 @@ ignore =
     B023
     # Functions defined inside a loop must not use variables redefined in the loop
     B301,
-    # single-letter variables
-    PDF023,
-    # "use 'pandas._testing' instead" in non-test code
-    PDF025,
     # If test must be a simple comparison against sys.platform or sys.version_info
     Y002,
     # Use "_typeshed.Self" instead of class-bound TypeVar
@@ -59,18 +55,6 @@ exclude =
     versioneer.py,
     # exclude asv benchmark environments from linting
     env
-per-file-ignores =
-    # private import across modules
-    pandas/tests/*:PDF020
-    # pytest.raises without match=
-    pandas/tests/extension/*:PDF009
-    # os.remove
-    doc/make.py:PDF008
-    # import from pandas._testing
-    pandas/testing.py:PDF014
-    # can't use fixtures in asv
-    asv_bench/*:PDF016
-
 
 [flake8-rst]
 max-line-length = 84

From 0358a9105e73b195bd781dfbc261733908bc6d7a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 31 Dec 2022 21:05:25 +0000
Subject: [PATCH 03/13] fixup?

---
 pandas/compat/__init__.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 63e7f9c61ccb1..05510c1713ef1 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -14,10 +14,7 @@
 import sys
 
 from pandas._typing import F
-from pandas.compat._compressors import (
-    LZMAFile,
-    has_lzma,
-)
+import pandas.compat
 from pandas.compat._constants import (
     IS64,
     PY39,
@@ -134,7 +131,7 @@ def is_ci_environment() -> bool:
     return os.environ.get("PANDAS_CI", "0") == "1"
 
 
-def get_lzma_file() -> type[LZMAFile]:
+def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]:
     """
     Importing the `LZMAFile` class from the `lzma` module.
 
@@ -148,13 +145,13 @@ def get_lzma_file() -> type[LZMAFile]:
     RuntimeError
         If the `lzma` module was not imported correctly, or didn't exist.
     """
-    if not has_lzma:
+    if not pandas.compat._compressors.has_lzma:
         raise RuntimeError(
             "lzma module not available. "
             "A Python re-install with the proper dependencies, "
             "might be required to solve this issue."
         )
-    return LZMAFile
+    return pandas.compat._compressors.LZMAFile
 
 
 __all__ = [

From 8036f386699a29de5e4c440c9c13ba47d3ba5fc2 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 1 Jan 2023 08:10:10 +0000
Subject: [PATCH 04/13] make _compressors public

---
 .pre-commit-config.yaml                           | 4 ++--
 pandas/compat/__init__.py                         | 8 ++++----
 pandas/compat/{_compressors.py => compressors.py} | 0
 pandas/io/common.py                               | 2 +-
 pandas/tests/io/test_pickle.py                    | 2 +-
 scripts/validate_unwanted_patterns.py             | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)
 rename pandas/compat/{_compressors.py => compressors.py} (100%)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0d2179bc3e0e1..a47503de3c227 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -329,8 +329,8 @@ repos:
         language: python
         entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
         types_or: [python, cython]
-    -   id: unwanted-patterns-strings-with-wrong-placed-whitespace
-        name: Check for strings with wrong placed spaces
+    -   id: unwanted-patterns-strings-with-misplaced-whitespace
+        name: Check for strings with misplaced spaces
         language: python
         entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
         types_or: [python, cython]
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 05510c1713ef1..b59b9632913e4 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -14,7 +14,6 @@
 import sys
 
 from pandas._typing import F
-import pandas.compat
 from pandas.compat._constants import (
     IS64,
     PY39,
@@ -22,6 +21,7 @@
     PY311,
     PYPY,
 )
+import pandas.compat.compressors
 from pandas.compat.numpy import (
     is_numpy_dev,
     np_version_under1p21,
@@ -131,7 +131,7 @@ def is_ci_environment() -> bool:
     return os.environ.get("PANDAS_CI", "0") == "1"
 
 
-def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]:
+def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
     """
     Importing the `LZMAFile` class from the `lzma` module.
 
@@ -145,13 +145,13 @@ def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]:
     RuntimeError
         If the `lzma` module was not imported correctly, or didn't exist.
     """
-    if not pandas.compat._compressors.has_lzma:
+    if not pandas.compat.compressors.has_lzma:
         raise RuntimeError(
             "lzma module not available. "
             "A Python re-install with the proper dependencies, "
             "might be required to solve this issue."
         )
-    return pandas.compat._compressors.LZMAFile
+    return pandas.compat.compressors.LZMAFile
 
 
 __all__ = [
diff --git a/pandas/compat/_compressors.py b/pandas/compat/compressors.py
similarity index 100%
rename from pandas/compat/_compressors.py
rename to pandas/compat/compressors.py
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 338bb6f50ea5d..6deaf40f00c69 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -55,8 +55,8 @@
     WriteBuffer,
 )
 from pandas.compat import get_lzma_file
-from pandas.compat._compressors import BZ2File as _BZ2File
 from pandas.compat._optional import import_optional_dependency
+from pandas.compat.compressors import BZ2File as _BZ2File
 from pandas.util._decorators import doc
 from pandas.util._exceptions import find_stack_level
 
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 24ac1b72f1476..07a028a19d7f9 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -37,8 +37,8 @@
     get_lzma_file,
     is_platform_little_endian,
 )
-from pandas.compat._compressors import flatten_buffer
 from pandas.compat._optional import import_optional_dependency
+from pandas.compat.compressors import flatten_buffer
 import pandas.util._test_decorators as td
 
 import pandas as pd
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
index f90fd6c3e8a3e..f78b9b8ced863 100755
--- a/scripts/validate_unwanted_patterns.py
+++ b/scripts/validate_unwanted_patterns.py
@@ -218,7 +218,7 @@ def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]
     tree = ast.parse(contents)
 
     for node in ast.walk(tree):
-        if not (isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)):
+        if not isinstance(node, (ast.Import, ast.ImportFrom)):
             continue
 
         for module in node.names:

From cca36f7492a25c2c50c6731698316ab729c54ce0 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 1 Jan 2023 12:12:57 +0000
Subject: [PATCH 05/13] use typing.Callable

---
 pandas/core/strings/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index 6cce14de61637..c96e5a1abcf86 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 import abc
-import collections
 import re
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Literal,
 )
 
@@ -69,7 +69,7 @@ def _str_endswith(self, pat, na=None):
     def _str_replace(
         self,
         pat: str | re.Pattern,
-        repl: str | collections.abc.Callable,
+        repl: str | Callable,
         n: int = -1,
         case: bool = True,
         flags: int = 0,

From ba2533db09b4ecd818d2061d9d015a76ea202ca0 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 1 Jan 2023 12:43:43 +0000
Subject: [PATCH 06/13] reduce diff

---
 .pre-commit-config.yaml | 38 ++++++++++++++++++++++
 ci/code_checks.sh       | 71 ++---------------------------------------
 2 files changed, 40 insertions(+), 69 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a47503de3c227..b4f8d04072c79 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -227,6 +227,15 @@ repos:
 
             # Check for deprecated messages without sphinx directive
             |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
+
+            # {foo!r} instead of {repr(foo)}
+            |!r}
+
+            # builtin filter function
+            |(?<!def)[\(\s]filter\(
+
+            # exec
+            |[^a-zA-Z0-9_]exec\(
         types_or: [python, cython, rst]
         exclude: ^doc/source/development/code_style\.rst  # contains examples of patterns to avoid
     -   id: cython-casting
@@ -280,8 +289,37 @@ repos:
 
             # pd.api.types instead of from pandas.api.types import ...
             |(pd|pandas)\.api\.types\.
+
+            # np.testing, np.array_equal
+            |(numpy|np)(\.testing|\.array_equal)
+
+            # unittest.mock (use pytest builtin monkeypatch fixture instead)
+            |(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
+
+            # pytest raises without context
+            |\s\ pytest.raises
+
+            # pytest.warns (use tm.assert_produces_warning instead)
+            |pytest\.warns
         files: ^pandas/tests/
         types_or: [python, cython, rst]
+    -   id: unwanted-patterns-in-ea-tests
+        name: Unwanted patterns in EA tests
+        language: pygrep
+        entry: |
+            (?x)
+            tm.assert_(series|frame)_equal
+        files: ^pandas/tests/extension/base/
+        exclude: ^pandas/tests/extension/base/base\.py$
+        types_or: [python, cython, rst]
+    -   id: unwanted-patterns-in-cython
+        name: Unwanted patterns in Cython code
+        language: pygrep
+        entry: |
+            (?x)
+            # `<type>obj` as opposed to `<type> obj`
+            [a-zA-Z0-9*]>
+        types: [cython]
     -   id: pip-to-conda
         name: Generate pip dependency from conda
         language: python
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 228cc5518495e..3c1362b1ac83e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -8,16 +8,14 @@
 #
 # Usage:
 #   $ ./ci/code_checks.sh               # run all checks
-#   $ ./ci/code_checks.sh lint          # run linting only
-#   $ ./ci/code_checks.sh patterns      # check for patterns that should not exist
 #   $ ./ci/code_checks.sh code          # checks on imported code
 #   $ ./ci/code_checks.sh doctests      # run doctests
 #   $ ./ci/code_checks.sh docstrings    # validate docstring errors
 #   $ ./ci/code_checks.sh single-docs   # check single-page docs build warning-free
 #   $ ./ci/code_checks.sh notebooks     # check execution of documentation notebooks
 
-[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
-    { echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }
+[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
+    { echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }
 
 BASE_DIR="$(dirname $0)/.."
 RET=0
@@ -40,71 +38,6 @@ if [[ "$GITHUB_ACTIONS" == "true" ]]; then
     INVGREP_PREPEND="##[error]"
 fi
 
-### LINTING ###
-if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
-
-    # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
-    # it doesn't make a difference, but we want to be internally consistent.
-    # Note: this grep pattern is (intended to be) equivalent to the python
-    # regex r'(?<![ ->])> '
-    MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG
-    invgrep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # readability/casting: Warnings about C casting instead of C++ casting
-    # runtime/int: Warnings about using C number types instead of C++ ones
-    # build/include_subdir: Warnings about prefacing included header files with directory
-
-fi
-
-### PATTERNS ###
-if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
-
-    MSG='Check for use of exec' ; echo $MSG
-    invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for pytest warns' ; echo $MSG
-    invgrep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for pytest raises without context' ; echo $MSG
-    invgrep -r -E --include '*.py' "[[:space:]] pytest.raises" pandas/tests/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for use of builtin filter function' ; echo $MSG
-    invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # Check for the following code in testing: `np.testing` and `np.array_equal`
-    MSG='Check for invalid testing' ; echo $MSG
-    invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
-    MSG='Check for invalid EA testing' ; echo $MSG
-    invgrep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for deprecated messages without sphinx directive' ; echo $MSG
-    invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
-    invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch`
-    MSG='Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG
-    invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
-    invgrep -R --include=*.{py,pyx} '!r}' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    echo $MSG "DONE"
-fi
-
 ### CODE ###
 if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
 

From ad4f1ebde2d6639fe6e7a7b27574a26c60172c31 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 1 Jan 2023 12:46:25 +0000
Subject: [PATCH 07/13] further reduce

---
 pandas/core/strings/object_array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index fb2478bfbe44e..508ac122d67af 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -1,12 +1,12 @@
 from __future__ import annotations
 
-from collections import abc
 import functools
 import re
 import sys
 import textwrap
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Literal,
 )
 import unicodedata
@@ -153,7 +153,7 @@ def _str_endswith(self, pat, na=None):
     def _str_replace(
         self,
         pat: str | re.Pattern,
-        repl: str | abc.Callable,
+        repl: str | Callable,
         n: int = -1,
         case: bool = True,
         flags: int = 0,

From 098f988ea1b8a6347edc83c5645653946c2bbd10 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 1 Jan 2023 12:51:20 +0000
Subject: [PATCH 08/13] one more

---
 pandas/core/arrays/string_arrow.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 40d3bcfe7bf12..fb081d0e63c96 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 
-from collections import abc
 import re
-from typing import Union
+from typing import (
+    Callable,
+    Union,
+)
 
 import numpy as np
 
@@ -302,7 +304,7 @@ def _str_endswith(self, pat: str, na=None):
     def _str_replace(
         self,
         pat: str | re.Pattern,
-        repl: str | abc.Callable,
+        repl: str | Callable,
         n: int = -1,
         case: bool = True,
         flags: int = 0,

From 4826c2a2d80cbdffc9dca3d03bdfc0486c8d30bc Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 1 Jan 2023 13:04:39 +0000
Subject: [PATCH 09/13] fixup regex

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b4f8d04072c79..10d6fdab5db39 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -318,7 +318,7 @@ repos:
         entry: |
             (?x)
             # `<type>obj` as opposed to `<type> obj`
-            [a-zA-Z0-9*]>
+            [a-zA-Z0-9*]>\s
         types: [cython]
     -   id: pip-to-conda
         name: Generate pip dependency from conda

From 87d460076351a366550a1e52279aea6cc92a4dbc Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 1 Jan 2023 13:07:21 +0000
Subject: [PATCH 10/13] fixup regex

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 10d6fdab5db39..dab3a98f2253b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -318,7 +318,7 @@ repos:
         entry: |
             (?x)
             # `<type>obj` as opposed to `<type> obj`
-            [a-zA-Z0-9*]>\s
+            [a-zA-Z0-9*]>[ ]
         types: [cython]
     -   id: pip-to-conda
         name: Generate pip dependency from conda

From 75626216290070ea6b6ef4d5b2a55bf5b7a8fc72 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 3 Jan 2023 18:32:23 +0000
Subject: [PATCH 11/13] add xfail to request

---
 pandas/tests/io/pytables/test_round_trip.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py
index fdae2d47b6389..00f0363d7fe92 100644
--- a/pandas/tests/io/pytables/test_round_trip.py
+++ b/pandas/tests/io/pytables/test_round_trip.py
@@ -349,7 +349,7 @@ def test_index_types(setup_path):
         _check_roundtrip(ser, func, path=setup_path)
 
 
-def test_timeseries_preepoch(setup_path):
+def test_timeseries_preepoch(setup_path, request):
 
     dr = bdate_range("1/1/1940", "1/1/1960")
     ts = Series(np.random.randn(len(dr)), index=dr)
@@ -357,7 +357,9 @@ def test_timeseries_preepoch(setup_path):
         _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
     except OverflowError:
         if is_platform_windows():
-            pytest.mark.xfail("known failure on some windows platforms")
+            request.node.add_marker(
+                pytest.mark.xfail("known failure on some windows platforms")
+            )
         else:
             raise
 

From c86353b2840e0bd24eba6a744a24354e67826f67 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 3 Jan 2023 20:04:00 +0000
Subject: [PATCH 12/13] remove else after xfail mark

---
 pandas/tests/io/pytables/test_round_trip.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py
index 00f0363d7fe92..5c7c4f9ce0b75 100644
--- a/pandas/tests/io/pytables/test_round_trip.py
+++ b/pandas/tests/io/pytables/test_round_trip.py
@@ -360,8 +360,7 @@ def test_timeseries_preepoch(setup_path, request):
             request.node.add_marker(
                 pytest.mark.xfail("known failure on some windows platforms")
             )
-        else:
-            raise
+        raise
 
 
 @pytest.mark.parametrize(

From 084efe021dc516073d01ed3545aad52e8c896e7a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 4 Jan 2023 16:54:02 +0000
Subject: [PATCH 13/13] make copy_on_write public

---
 pandas/core/generic.py            |  4 ++--
 pandas/core/internals/managers.py | 28 ++++++++++++++--------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 51f3a4eae6ade..21b2907bbf2ef 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -159,7 +159,7 @@
     SingleArrayManager,
 )
 from pandas.core.internals.construction import mgr_to_mgr
-from pandas.core.internals.managers import _using_copy_on_write
+from pandas.core.internals.managers import using_copy_on_write
 from pandas.core.missing import (
     clean_fill_method,
     clean_reindex_fill_method,
@@ -10102,7 +10102,7 @@ def truncate(
         if isinstance(ax, MultiIndex):
             setattr(result, self._get_axis_name(axis), ax.truncate(before, after))
 
-        if copy or (copy is None and not _using_copy_on_write()):
+        if copy or (copy is None and not using_copy_on_write()):
             result = result.copy(deep=copy)
 
         return result
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index c70a7e29bf3ce..c4e869a3f6a45 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -376,7 +376,7 @@ def setitem(self: T, indexer, value) -> T:
         if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:
             raise ValueError(f"Cannot set values with ndim > {self.ndim}")
 
-        if _using_copy_on_write() and not self._has_no_reference(0):
+        if using_copy_on_write() and not self._has_no_reference(0):
             # if being referenced -> perform Copy-on-Write and clear the reference
             # this method is only called if there is a single block -> hardcoded 0
             self = self.copy()
@@ -385,7 +385,7 @@ def setitem(self: T, indexer, value) -> T:
 
     def putmask(self, mask, new, align: bool = True):
         if (
-            _using_copy_on_write()
+            using_copy_on_write()
             and self.refs is not None
             and not all(ref is None for ref in self.refs)
         ):
@@ -429,7 +429,7 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
             limit = libalgos.validate_limit(None, limit=limit)
         if inplace:
             # TODO(CoW) can be optimized to only copy those blocks that have refs
-            if _using_copy_on_write() and any(
+            if using_copy_on_write() and any(
                 not self._has_no_reference_block(i) for i in range(len(self.blocks))
             ):
                 self = self.copy()
@@ -613,7 +613,7 @@ def copy(self: T, deep: bool | None | Literal["all"] = True) -> T:
         BlockManager
         """
         if deep is None:
-            if _using_copy_on_write():
+            if using_copy_on_write():
                 # use shallow copy
                 deep = False
             else:
@@ -701,7 +701,7 @@ def reindex_indexer(
         pandas-indexer with -1's only.
         """
         if copy is None:
-            if _using_copy_on_write():
+            if using_copy_on_write():
                 # use shallow copy
                 copy = False
             else:
@@ -879,7 +879,7 @@ def _slice_take_blocks_ax0(
                     #  we may try to only slice
                     taker = blklocs[mgr_locs.indexer]
                     max_len = max(len(mgr_locs), taker.max() + 1)
-                    if only_slice or _using_copy_on_write():
+                    if only_slice or using_copy_on_write():
                         taker = lib.maybe_indices_to_slice(taker, max_len)
 
                     if isinstance(taker, slice):
@@ -1039,7 +1039,7 @@ def from_blocks(
         """
         Constructor for BlockManager and SingleBlockManager with same signature.
         """
-        parent = parent if _using_copy_on_write() else None
+        parent = parent if using_copy_on_write() else None
         return cls(blocks, axes, refs, parent, verify_integrity=False)
 
     # ----------------------------------------------------------------
@@ -1222,7 +1222,7 @@ def value_getitem(placement):
             blk_locs = blklocs[val_locs.indexer]
             if inplace and blk.should_store(value):
                 # Updating inplace -> check if we need to do Copy-on-Write
-                if _using_copy_on_write() and not self._has_no_reference_block(blkno_l):
+                if using_copy_on_write() and not self._has_no_reference_block(blkno_l):
                     blk.set_inplace(blk_locs, value_getitem(val_locs), copy=True)
                     self._clear_reference_block(blkno_l)
                 else:
@@ -1320,7 +1320,7 @@ def _iset_single(
 
         if inplace and blk.should_store(value):
             copy = False
-            if _using_copy_on_write() and not self._has_no_reference_block(blkno):
+            if using_copy_on_write() and not self._has_no_reference_block(blkno):
                 # perform Copy-on-Write and clear the reference
                 copy = True
                 self._clear_reference_block(blkno)
@@ -1344,7 +1344,7 @@ def column_setitem(
         This is a method on the BlockManager level, to avoid creating an
         intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
         """
-        if _using_copy_on_write() and not self._has_no_reference(loc):
+        if using_copy_on_write() and not self._has_no_reference(loc):
             # otherwise perform Copy-on-Write and clear the reference
             blkno = self.blknos[loc]
             blocks = list(self.blocks)
@@ -1839,7 +1839,7 @@ def __init__(
         self.axes = [axis]
         self.blocks = (block,)
         self.refs = refs
-        self.parent = parent if _using_copy_on_write() else None
+        self.parent = parent if using_copy_on_write() else None
 
     @classmethod
     def from_blocks(
@@ -1876,7 +1876,7 @@ def to_2d_mgr(self, columns: Index) -> BlockManager:
         new_blk = type(blk)(arr, placement=bp, ndim=2)
         axes = [columns, self.axes[0]]
         refs: list[weakref.ref | None] = [weakref.ref(blk)]
-        parent = self if _using_copy_on_write() else None
+        parent = self if using_copy_on_write() else None
         return BlockManager(
             [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False
         )
@@ -2020,7 +2020,7 @@ def setitem_inplace(self, indexer, value) -> None:
         in place, not returning a new Manager (and Block), and thus never changing
         the dtype.
         """
-        if _using_copy_on_write() and not self._has_no_reference(0):
+        if using_copy_on_write() and not self._has_no_reference(0):
             self.blocks = (self._block.copy(),)
             self.refs = None
             self.parent = None
@@ -2357,5 +2357,5 @@ def _preprocess_slice_or_indexer(
 _mode_options = config._global_config["mode"]
 
 
-def _using_copy_on_write():
+def using_copy_on_write():
     return _mode_options["copy_on_write"]