pandas-dev
diff --git a/‎.github/workflows/32-bit-linux.yml
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/32-bit-linux.yml
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/workflows/python-dev.yml
Lines changed: 29 additions & 17 deletions b/‎.github/workflows/python-dev.yml
Lines changed: 29 additions & 17 deletions
diff --git a/‎.github/workflows/ubuntu.yml
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/ubuntu.yml
Lines changed: 6 additions & 1 deletion
diff --git a/‎Dockerfile
Lines changed: 1 addition & 2 deletions b/‎Dockerfile
Lines changed: 1 addition & 2 deletions
diff --git a/‎asv_bench/benchmarks/hash_functions.py
Lines changed: 15 additions & 0 deletions b/‎asv_bench/benchmarks/hash_functions.py
Lines changed: 15 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 1 addition & 3 deletions b/‎asv_bench/benchmarks/reshape.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎asv_bench/benchmarks/series_methods.py
Lines changed: 10 additions & 0 deletions b/‎asv_bench/benchmarks/series_methods.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/redirects.csv
Lines changed: 2 additions & 3 deletions b/‎doc/redirects.csv
Lines changed: 2 additions & 3 deletions
diff --git a/‎doc/source/development/contributing_codebase.rst
Lines changed: 6 additions & 1 deletion b/‎doc/source/development/contributing_codebase.rst
Lines changed: 6 additions & 1 deletion
diff --git a/‎doc/source/reference/arrays.rst
Lines changed: 53 additions & 16 deletions b/‎doc/source/reference/arrays.rst
Lines changed: 53 additions & 16 deletions
@@ -39,8 +39,9 @@ jobs:
           . ~/virtualenvs/pandas-dev/bin/activate && \
           python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
           pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
-          python setup.py build_ext -q -j2 && \
+          python setup.py build_ext -q -j1 && \
           python -m pip install --no-build-isolation --no-use-pep517 -e . && \
+          python -m pip list && \
           export PANDAS_CI=1 && \
           pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml"
 
 
@@ -1,9 +1,21 @@
-# This file is purposely frozen(does not run). DO NOT DELETE IT
-# Unfreeze(by commentingthe if: false() condition) once the
-# next Python Dev version has released beta 1 and both Cython and numpy support it
-# After that Python has released, migrate the workflows to the
-# posix GHA workflows and "freeze" this file by
-# uncommenting the if: false() condition
+# This workflow may or may not run depending on the state of the next
+# unreleased Python version. DO NOT DELETE IT.
+#
+# In general, this file will remain frozen(present, but not running) until:
+#    - The next unreleased Python version has released beta 1
+#      - This version should be available on Github Actions.
+#    - Our required build/runtime dependencies(numpy, pytz, Cython, python-dateutil)
+#      support that unreleased Python version.
+#    To unfreeze, comment out the ``if: false`` condition, and make sure you update
+#    the name of the workflow and Python version in actions/setup-python to: '3.12-dev'
+#
+# After it has been unfrozen, this file should remain unfrozen(present, and running) until:
+#    - The next Python version has been officially released.
+#    OR
+#    - Most/All of our optional dependencies support Python 3.11 AND
+#    - The next Python version has released a rc(we are guaranteed a stable ABI).
+#    To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
+#    to the corresponding posix/windows-macos/sdist etc. workflows.
 # Feel free to modify this comment as necessary.
 
 name: Python Dev
@@ -32,7 +44,7 @@ permissions:
 
 jobs:
   build:
-    if: false # Comment this line out to "unfreeze"
+    # if: false # Uncomment this to freeze the workflow, comment it to unfreeze
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -53,27 +65,27 @@ jobs:
         fetch-depth: 0
 
     - name: Set up Python Dev Version
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v4
       with:
         python-version: '3.11-dev'
 
     - name: Install dependencies
-      shell: bash -el {0}
       run: |
-        python3 -m pip install --upgrade pip setuptools wheel
-        python3 -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
-        python3 -m pip install git+https://github.com/nedbat/coveragepy.git
-        python3 -m pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
-        python3 -m pip list
+        python --version
+        python -m pip install --upgrade pip setuptools wheel
+        python -m pip install git+https://github.com/numpy/numpy.git
+        python -m pip install git+https://github.com/nedbat/coveragepy.git
+        python -m pip install python-dateutil pytz cython hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
+        python -m pip list
 
     - name: Build Pandas
       run: |
-        python3 setup.py build_ext -q -j2
-        python3 -m pip install -e . --no-build-isolation --no-use-pep517
+        python setup.py build_ext -q -j2
+        python -m pip install -e . --no-build-isolation --no-use-pep517
 
     - name: Build Version
       run: |
-        python3 -c "import pandas; pandas.show_versions();"
+        python -c "import pandas; pandas.show_versions();"
 
     - name: Test
       uses: ./.github/actions/run-tests
@@ -52,6 +52,10 @@ jobs:
             extra_apt: "language-pack-zh-hans"
             lang: "zh_CN.utf8"
             lc_all: "zh_CN.utf8"
+          - name: "Copy-on-Write"
+            env_file: actions-310.yaml
+            pattern: "not slow and not network and not single_cpu"
+            pandas_copy_on_write: "1"
           - name: "Data Manager"
             env_file: actions-38.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -64,7 +68,7 @@ jobs:
             env_file: actions-310-numpydev.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_testing_mode: "deprecate"
-            test_args: "-W error::DeprecationWarning:numpy"
+            test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy"
         exclude:
           - env_file: actions-39.yaml
             pyarrow_version: "6"
@@ -84,6 +88,7 @@ jobs:
       LC_ALL: ${{ matrix.lc_all || '' }}
       PANDAS_TESTING_MODE: ${{ matrix.pandas_testing_mode || '' }}
       PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
+      PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
       TEST_ARGS: ${{ matrix.test_args || '' }}
       PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
 
@@ -1,4 +1,4 @@
-FROM quay.io/condaforge/miniforge3
+FROM quay.io/condaforge/mambaforge
 
 # if you forked pandas, you can pass in your own GitHub username to use your fork
 # i.e. gh_username=myname
@@ -40,7 +40,6 @@ RUN mkdir "$pandas_home" \
 # we just update the base/root one from the 'environment.yml' file instead of creating a new one.
 #
 # Set up environment
-RUN conda install -y mamba
 RUN mamba env update -n base -f "$pandas_home/environment.yml"
 
 # Build C extensions and pandas
 
@@ -39,6 +39,21 @@ def time_unique(self, exponent):
         pd.unique(self.a2)
 
 
+class Unique:
+    params = ["Int64", "Float64"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        self.ser = pd.Series(([1, pd.NA, 2] + list(range(100_000))) * 3, dtype=dtype)
+        self.ser_unique = pd.Series(list(range(300_000)) + [pd.NA], dtype=dtype)
+
+    def time_unique_with_duplicates(self, exponent):
+        pd.unique(self.ser)
+
+    def time_unique(self, exponent):
+        pd.unique(self.ser_unique)
+
+
 class NumericSeriesIndexing:
 
     params = [
 
@@ -268,9 +268,7 @@ def setup(self, bins):
         self.datetime_series = pd.Series(
             np.random.randint(N, size=N), dtype="datetime64[ns]"
         )
-        self.interval_bins = pd.IntervalIndex.from_breaks(
-            np.linspace(0, N, bins), "right"
-        )
+        self.interval_bins = pd.IntervalIndex.from_breaks(np.linspace(0, N, bins))
 
     def time_cut_int(self, bins):
         pd.cut(self.int_series, bins)
 
@@ -144,6 +144,16 @@ def time_clip(self, n):
         self.s.clip(0, 1)
 
 
+class ClipDt:
+    def setup(self):
+        dr = date_range("20220101", periods=100_000, freq="s", tz="UTC")
+        self.clipper_dt = dr[0:1_000].repeat(100)
+        self.s = Series(dr)
+
+    def time_clip(self):
+        self.s.clip(upper=self.clipper_dt)
+
+
 class ValueCounts:
 
     params = [[10**3, 10**4, 10**5], ["int", "uint", "float", "object"]]
 
@@ -741,11 +741,11 @@ generated/pandas.Index.values,../reference/api/pandas.Index.values
 generated/pandas.Index.view,../reference/api/pandas.Index.view
 generated/pandas.Index.where,../reference/api/pandas.Index.where
 generated/pandas.infer_freq,../reference/api/pandas.infer_freq
-generated/pandas.Interval.inclusive,../reference/api/pandas.Interval.inclusive
+generated/pandas.Interval.closed,../reference/api/pandas.Interval.closed
 generated/pandas.Interval.closed_left,../reference/api/pandas.Interval.closed_left
 generated/pandas.Interval.closed_right,../reference/api/pandas.Interval.closed_right
 generated/pandas.Interval,../reference/api/pandas.Interval
-generated/pandas.IntervalIndex.inclusive,../reference/api/pandas.IntervalIndex.inclusive
+generated/pandas.IntervalIndex.closed,../reference/api/pandas.IntervalIndex.closed
 generated/pandas.IntervalIndex.contains,../reference/api/pandas.IntervalIndex.contains
 generated/pandas.IntervalIndex.from_arrays,../reference/api/pandas.IntervalIndex.from_arrays
 generated/pandas.IntervalIndex.from_breaks,../reference/api/pandas.IntervalIndex.from_breaks
@@ -761,7 +761,6 @@ generated/pandas.IntervalIndex.mid,../reference/api/pandas.IntervalIndex.mid
 generated/pandas.IntervalIndex.overlaps,../reference/api/pandas.IntervalIndex.overlaps
 generated/pandas.IntervalIndex.right,../reference/api/pandas.IntervalIndex.right
 generated/pandas.IntervalIndex.set_closed,../reference/api/pandas.IntervalIndex.set_closed
-generated/pandas.IntervalIndex.set_inclusive,../reference/api/pandas.IntervalIndex.set_inclusive
 generated/pandas.IntervalIndex.to_tuples,../reference/api/pandas.IntervalIndex.to_tuples
 generated/pandas.IntervalIndex.values,../reference/api/pandas.IntervalIndex.values
 generated/pandas.Interval.left,../reference/api/pandas.Interval.left
 
@@ -122,6 +122,7 @@ Otherwise, you need to do it manually:
 .. code-block:: python
 
     import warnings
+    from pandas.util._exceptions import find_stack_level
 
 
     def old_func():
@@ -130,7 +131,11 @@ Otherwise, you need to do it manually:
         .. deprecated:: 1.1.0
            Use new_func instead.
         """
-        warnings.warn('Use new_func instead.', FutureWarning, stacklevel=2)
+        warnings.warn(
+            'Use new_func instead.',
+            FutureWarning,
+            stacklevel=find_stack_level(inspect.currentframe()),
+        )
         new_func()
 
 
 
@@ -19,19 +19,20 @@ objects contained with a :class:`Index`, :class:`Series`, or
 For some data types, pandas extends NumPy's type system. String aliases for these types
 can be found at :ref:`basics.dtypes`.
 
-=================== ========================= ================== =============================
-Kind of Data        pandas Data Type          Scalar             Array
-=================== ========================= ================== =============================
-TZ-aware datetime   :class:`DatetimeTZDtype`  :class:`Timestamp` :ref:`api.arrays.datetime`
-Timedeltas          (none)                    :class:`Timedelta` :ref:`api.arrays.timedelta`
-Period (time spans) :class:`PeriodDtype`      :class:`Period`    :ref:`api.arrays.period`
-Intervals           :class:`IntervalDtype`    :class:`Interval`  :ref:`api.arrays.interval`
-Nullable Integer    :class:`Int64Dtype`, ...  (none)             :ref:`api.arrays.integer_na`
-Categorical         :class:`CategoricalDtype` (none)             :ref:`api.arrays.categorical`
-Sparse              :class:`SparseDtype`      (none)             :ref:`api.arrays.sparse`
-Strings             :class:`StringDtype`      :class:`str`       :ref:`api.arrays.string`
-Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`      :ref:`api.arrays.bool`
-=================== ========================= ================== =============================
+=================== ========================= ============================= =============================
+Kind of Data        pandas Data Type          Scalar                        Array
+=================== ========================= ============================= =============================
+TZ-aware datetime   :class:`DatetimeTZDtype`  :class:`Timestamp`            :ref:`api.arrays.datetime`
+Timedeltas          (none)                    :class:`Timedelta`            :ref:`api.arrays.timedelta`
+Period (time spans) :class:`PeriodDtype`      :class:`Period`               :ref:`api.arrays.period`
+Intervals           :class:`IntervalDtype`    :class:`Interval`             :ref:`api.arrays.interval`
+Nullable Integer    :class:`Int64Dtype`, ...  (none)                        :ref:`api.arrays.integer_na`
+Categorical         :class:`CategoricalDtype` (none)                        :ref:`api.arrays.categorical`
+Sparse              :class:`SparseDtype`      (none)                        :ref:`api.arrays.sparse`
+Strings             :class:`StringDtype`      :class:`str`                  :ref:`api.arrays.string`
+Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`                 :ref:`api.arrays.bool`
+PyArrow             :class:`ArrowDtype`       Python Scalars or :class:`NA` :ref:`api.arrays.arrow`
+=================== ========================= ============================= =============================
 
 pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
 The top-level :meth:`array` method can be used to create a new array, which may be
@@ -42,6 +43,44 @@ stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFra
 
    array
 
+.. _api.arrays.arrow:
+
+PyArrow
+-------
+
+.. warning::
+
+    This feature is experimental, and the API can change in a future release without warning.
+
+The :class:`arrays.ArrowExtensionArray` is backed by a :external+pyarrow:py:class:`pyarrow.ChunkedArray` with a
+:external+pyarrow:py:class:`pyarrow.DataType` instead of a NumPy array and data type. The ``.dtype`` of a :class:`arrays.ArrowExtensionArray`
+is an :class:`ArrowDtype`.
+
+`Pyarrow <https://arrow.apache.org/docs/python/index.html>`__ provides similar array and `data type <https://arrow.apache.org/docs/python/api/datatypes.html>`__
+support as NumPy including first-class nullability support for all data types, immutability and more.
+
+.. note::
+
+    For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated
+    by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section <api.arrays.string>`
+    below.
+
+While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned**
+as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing
+values.
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   arrays.ArrowExtensionArray
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   ArrowDtype
+
 .. _api.arrays.datetime:
 
 Datetimes
@@ -303,7 +342,6 @@ Properties
 .. autosummary::
    :toctree: api/
 
-   Interval.inclusive
    Interval.closed
    Interval.closed_left
    Interval.closed_right
@@ -341,7 +379,7 @@ A collection of intervals may be stored in an :class:`arrays.IntervalArray`.
 
       arrays.IntervalArray.left
       arrays.IntervalArray.right
-      arrays.IntervalArray.inclusive
+      arrays.IntervalArray.closed
       arrays.IntervalArray.mid
       arrays.IntervalArray.length
       arrays.IntervalArray.is_empty
@@ -352,7 +390,6 @@ A collection of intervals may be stored in an :class:`arrays.IntervalArray`.
       arrays.IntervalArray.contains
       arrays.IntervalArray.overlaps
       arrays.IntervalArray.set_closed
-      arrays.IntervalArray.set_inclusive
       arrays.IntervalArray.to_tuples