Skip to content

Commit 1fc89b7

Browse files
committed
Merge branch 'master' into categorical_map_na_Action
2 parents b599c6f + 8c7b8a4 commit 1fc89b7

File tree

135 files changed

+1201
-946
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+1201
-946
lines changed

.github/workflows/dependabot.yml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: github-actions
4+
directory: /
5+
schedule:
6+
interval: weekly
7+
labels:
8+
- "CI"
9+
- "Dependencies"

.pre-commit-config.yaml

+3-11
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ repos:
2828
types_or: [python, pyi]
2929
additional_dependencies: [black==23.1.0]
3030
- repo: https://github.com/charliermarsh/ruff-pre-commit
31-
rev: v0.0.255
31+
rev: v0.0.259
3232
hooks:
3333
- id: ruff
3434
args: [--exit-non-zero-on-fix]
@@ -392,14 +392,6 @@ repos:
392392
files: ^pandas/
393393
exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
394394
types: [python]
395-
- id: flake8-pyi
396-
name: flake8-pyi
397-
entry: flake8 --extend-ignore=E301,E302,E305,E701,E704
398-
types: [pyi]
399-
language: python
400-
additional_dependencies:
401-
- flake8==5.0.4
402-
- flake8-pyi==22.8.1
403395
- id: future-annotations
404396
name: import annotations from __future__
405397
entry: 'from __future__ import annotations'
@@ -421,8 +413,8 @@ repos:
421413
language: python
422414
stages: [manual]
423415
additional_dependencies:
424-
- autotyping==22.9.0
425-
- libcst==0.4.7
416+
- autotyping==23.3.0
417+
- libcst==0.4.9
426418
- id: check-test-naming
427419
name: check that test names start with 'test'
428420
entry: python -m scripts.check_test_naming

asv_bench/benchmarks/arithmetic.py

+4
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,14 @@ def setup(self, tz):
266266
self.ts = self.s[halfway]
267267

268268
self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz))
269+
self.ts_different_reso = Timestamp("2001-01-02", tz=tz)
269270

270271
def time_series_timestamp_compare(self, tz):
271272
self.s <= self.ts
272273

274+
def time_series_timestamp_different_reso_compare(self, tz):
275+
self.s <= self.ts_different_reso
276+
273277
def time_timestamp_series_compare(self, tz):
274278
self.ts >= self.s
275279

asv_bench/benchmarks/strings.py

-7
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def setup(self, dtype):
3434

3535
# GH37371. Testing construction of string series/frames from ExtensionArrays
3636
self.series_cat_arr = Categorical(self.series_arr)
37-
self.frame_cat_arr = Categorical(self.frame_arr)
3837

3938
def time_series_construction(self, dtype):
4039
Series(self.series_arr, dtype=dtype)
@@ -54,12 +53,6 @@ def time_cat_series_construction(self, dtype):
5453
def peakmem_cat_series_construction(self, dtype):
5554
Series(self.series_cat_arr, dtype=dtype)
5655

57-
def time_cat_frame_construction(self, dtype):
58-
DataFrame(self.frame_cat_arr, dtype=dtype)
59-
60-
def peakmem_cat_frame_construction(self, dtype):
61-
DataFrame(self.frame_cat_arr, dtype=dtype)
62-
6356

6457
class Methods(Dtypes):
6558
def time_center(self, dtype):

ci/code_checks.sh

-3
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8686
MSG='Partially validate docstrings (EX01)' ; echo $MSG
8787
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
8888
pandas.Series.index \
89-
pandas.Series.hasnans \
90-
pandas.Series.to_list \
9189
pandas.Series.__iter__ \
9290
pandas.Series.keys \
9391
pandas.Series.item \
@@ -309,7 +307,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
309307
pandas_object \
310308
pandas.api.interchange.from_dataframe \
311309
pandas.Index.values \
312-
pandas.Index.hasnans \
313310
pandas.Index.dtype \
314311
pandas.Index.inferred_type \
315312
pandas.Index.shape \
5.17 KB
Loading

doc/source/getting_started/index.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ Data sets do not only contain numerical data. pandas provides a wide range of fu
533533
Coming from...
534534
--------------
535535

536-
Are you familiar with other software for manipulating tablular data? Learn
536+
Are you familiar with other software for manipulating tabular data? Learn
537537
the pandas-equivalent operations compared to software you already know:
538538

539539
.. panels::

doc/source/getting_started/tutorials.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ Various tutorials
113113
* `Wes McKinney's (pandas BDFL) blog <https://wesmckinney.com/archives.html>`_
114114
* `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson <http://www.randalolson.com/2012/08/06/statistical-analysis-made-easy-in-python/>`_
115115
* `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 <https://conference.scipy.org/scipy2013/tutorial_detail.php?id=109>`_
116-
* `Financial analysis in Python, by Thomas Wiecki <https://nbviewer.ipython.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
116+
* `Financial analysis in Python, by Thomas Wiecki <https://nbviewer.org/github/twiecki/financial-analysis-python-tutorial/blob/master/1.%20Pandas%20Basics.ipynb>`_
117117
* `Intro to pandas data structures, by Greg Reda <http://www.gregreda.com/2013/10/26/intro-to-pandas-data-structures/>`_
118118
* `Pandas and Python: Top 10, by Manish Amde <https://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/>`_
119119
* `Pandas DataFrames Tutorial, by Karlijn Willems <https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python>`_

doc/source/reference/arrays.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,10 @@ PyArrow type pandas extension type NumPy
9393

9494
.. note::
9595

96-
For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated
97-
by :class:`arrays.ArrowStringArray` and ``StringDtype("pyarrow")``. See the :ref:`string section <api.arrays.string>`
98-
below.
96+
Pyarrow-backed string support is provided by both ``pd.StringDtype("pyarrow")`` and ``pd.ArrowDtype(pa.string())``.
97+
``pd.StringDtype("pyarrow")`` is described below in the :ref:`string section <api.arrays.string>`
98+
and will be returned if the string alias ``"string[pyarrow]"`` is specified. ``pd.ArrowDtype(pa.string())``
99+
generally has better interoperability with :class:`ArrowDtype` of different types.
99100

100101
While individual values in an :class:`arrays.ArrowExtensionArray` are stored as a PyArrow objects, scalars are **returned**
101102
as Python scalars corresponding to the data type, e.g. a PyArrow int64 will be returned as Python int, or :class:`NA` for missing

doc/source/user_guide/advanced.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ As usual, **both sides** of the slicers are included as this is label indexing.
322322
.. warning::
323323

324324
You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and
325-
for the **columns**. There are some ambiguous cases where the passed indexer could be mis-interpreted
325+
for the **columns**. There are some ambiguous cases where the passed indexer could be misinterpreted
326326
  as indexing *both* axes, rather than into say the ``MultiIndex`` for the rows.
327327

328328
You should do this:

doc/source/user_guide/groupby.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ the columns except the one we specify:
149149
grouped.sum()
150150
151151
The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do
152-
a tranpose:
152+
a transpose:
153153

154154
.. ipython::
155155

doc/source/user_guide/pyarrow.rst

+19-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,23 @@ which is similar to a NumPy array. To construct these from the main pandas data
3535
df = pd.DataFrame([[1, 2], [3, 4]], dtype="uint64[pyarrow]")
3636
df
3737
38+
.. note::
39+
40+
The string alias ``"string[pyarrow]"`` maps to ``pd.StringDtype("pyarrow")`` which is not equivalent to
41+
specifying ``dtype=pd.ArrowDtype(pa.string())``. Generally, operations on the data will behave similarly
42+
except ``pd.StringDtype("pyarrow")`` can return NumPy-backed nullable types while ``pd.ArrowDtype(pa.string())``
43+
will return :class:`ArrowDtype`.
44+
45+
.. ipython:: python
46+
47+
import pyarrow as pa
48+
data = list("abc")
49+
ser_sd = pd.Series(data, dtype="string[pyarrow]")
50+
ser_ad = pd.Series(data, dtype=pd.ArrowDtype(pa.string()))
51+
ser_ad.dtype == ser_sd.dtype
52+
ser_sd.str.contains("a")
53+
ser_ad.str.contains("a")
54+
3855
For PyArrow types that accept parameters, you can pass in a PyArrow type with those parameters
3956
into :class:`ArrowDtype` to use in the ``dtype`` parameter.
4057

@@ -106,6 +123,7 @@ The following are just some examples of operations that are accelerated by nativ
106123

107124
.. ipython:: python
108125
126+
import pyarrow as pa
109127
ser = pd.Series([-1.545, 0.211, None], dtype="float32[pyarrow]")
110128
ser.mean()
111129
ser + ser
@@ -115,7 +133,7 @@ The following are just some examples of operations that are accelerated by nativ
115133
ser.isna()
116134
ser.fillna(0)
117135
118-
ser_str = pd.Series(["a", "b", None], dtype="string[pyarrow]")
136+
ser_str = pd.Series(["a", "b", None], dtype=pd.ArrowDtype(pa.string()))
119137
ser_str.str.startswith("a")
120138
121139
from datetime import datetime

doc/source/user_guide/reshaping.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Reshaping by pivoting DataFrame objects
1313

1414
.. image:: ../_static/reshaping_pivot.png
1515

16-
Data is often stored in so-called "stacked" or "record" format:
16+
Data is often stored in so-called "stacked" or "record" format. In a "record" or "wide" format typically there is one row for each subject. In the "stacked" or "long" format there are multiple rows for each subject where applicable.
1717

1818
.. ipython:: python
1919

doc/source/user_guide/timeseries.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -507,14 +507,18 @@ used if a custom frequency string is passed.
507507
Timestamp limitations
508508
---------------------
509509

510-
Since pandas represents timestamps in nanosecond resolution, the time span that
510+
The limits of timestamp representation depend on the chosen resolution. For
511+
nanosecond resolution, the time span that
511512
can be represented using a 64-bit integer is limited to approximately 584 years:
512513

513514
.. ipython:: python
514515
515516
pd.Timestamp.min
516517
pd.Timestamp.max
517518
519+
When choosing second-resolution, the available range grows to ``+/- 2.9e11 years``.
520+
Different resolutions can be converted to each other through ``as_unit``.
521+
518522
.. seealso::
519523

520524
:ref:`timeseries.oob`

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,7 @@ Timedelta
11901190
- Bug in :func:`to_timedelta` raising error when input has nullable dtype ``Float64`` (:issue:`48796`)
11911191
- Bug in :class:`Timedelta` constructor incorrectly raising instead of returning ``NaT`` when given a ``np.timedelta64("nat")`` (:issue:`48898`)
11921192
- Bug in :class:`Timedelta` constructor failing to raise when passed both a :class:`Timedelta` object and keywords (e.g. days, seconds) (:issue:`48898`)
1193+
- Bug in :class:`Timedelta` comparisons with very large ``datetime.timedelta`` objects incorrect raising ``OutOfBoundsTimedelta`` (:issue:`49021`)
11931194

11941195
Timezones
11951196
^^^^^^^^^

doc/source/whatsnew/v2.1.0.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Other enhancements
3939
- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`)
4040
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
4141
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
42+
- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
4243
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
4344
- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`)
4445

@@ -119,6 +120,7 @@ Deprecations
119120
- Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
120121
- Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
121122
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
123+
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
122124
-
123125

124126
.. ---------------------------------------------------------------------------
@@ -205,13 +207,14 @@ Missing
205207

206208
MultiIndex
207209
^^^^^^^^^^
208-
-
210+
- Bug in :meth:`MultiIndex.set_levels` not preserving dtypes for :class:`Categorical` (:issue:`52125`)
209211
-
210212

211213
I/O
212214
^^^
213215
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
214216
- :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
217+
- Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
215218
-
216219

217220
Period

pandas/_config/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,7 @@ def pp(name: str, ks: Iterable[str]) -> list[str]:
737737

738738

739739
@contextmanager
740-
def config_prefix(prefix) -> Generator[None, None, None]:
740+
def config_prefix(prefix: str) -> Generator[None, None, None]:
741741
"""
742742
contextmanager for multiple invocations of API with a common prefix
743743

pandas/_libs/lib.pyx

+6-2
Original file line numberDiff line numberDiff line change
@@ -2326,10 +2326,14 @@ def maybe_convert_numeric(
23262326
if not seen.coerce_numeric:
23272327
raise type(err)(f"{err} at position {i}")
23282328

2329-
seen.saw_null()
2330-
floats[i] = NaN
23312329
mask[i] = 1
23322330

2331+
if allow_null_in_int:
2332+
seen.null_ = True
2333+
else:
2334+
seen.saw_null()
2335+
floats[i] = NaN
2336+
23332337
if seen.check_uint64_conflict():
23342338
return (values, None)
23352339

0 commit comments

Comments
 (0)