Skip to content

Commit 3d72cf2

Browse files
Merge branch 'pandas-dev:main' into raise-on-parse-int-overflow
2 parents ba40923 + 03b4095 commit 3d72cf2

File tree

16 files changed

+148
-52
lines changed

16 files changed

+148
-52
lines changed

doc/source/conf.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,12 +653,20 @@ def linkcode_resolve(domain, info):
653653
try:
654654
fn = inspect.getsourcefile(inspect.unwrap(obj))
655655
except TypeError:
656-
fn = None
656+
try: # property
657+
fn = inspect.getsourcefile(inspect.unwrap(obj.fget))
658+
except (AttributeError, TypeError):
659+
fn = None
657660
if not fn:
658661
return None
659662

660663
try:
661664
source, lineno = inspect.getsourcelines(obj)
665+
except TypeError:
666+
try: # property
667+
source, lineno = inspect.getsourcelines(obj.fget)
668+
except (AttributeError, TypeError):
669+
lineno = None
662670
except OSError:
663671
lineno = None
664672

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ Other enhancements
293293
- :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`)
294294
- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`)
295295
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`)
296+
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
296297

297298
.. ---------------------------------------------------------------------------
298299
.. _whatsnew_150.notable_bug_fixes:

pandas/_libs/algos.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def ensure_int8(arr: object, copy=...) -> npt.NDArray[np.int8]: ...
132132
def ensure_int16(arr: object, copy=...) -> npt.NDArray[np.int16]: ...
133133
def ensure_int32(arr: object, copy=...) -> npt.NDArray[np.int32]: ...
134134
def ensure_int64(arr: object, copy=...) -> npt.NDArray[np.int64]: ...
135+
def ensure_uint64(arr: object, copy=...) -> npt.NDArray[np.uint64]: ...
135136
def take_1d_int8_int8(
136137
values: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, fill_value=...
137138
) -> None: ...

pandas/_libs/algos_common_helper.pxi.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
4141
('int16', 'INT16', 'int16'),
4242
('int32', 'INT32', 'int32'),
4343
('int64', 'INT64', 'int64'),
44+
('uint64', 'UINT64', 'uint64'),
4445
# Disabling uint and complex dtypes because we do not use them
45-
# (and compiling them increases wheel size)
46+
# (and compiling them increases wheel size) (except uint64)
4647
# ('uint8', 'UINT8', 'uint8'),
4748
# ('uint16', 'UINT16', 'uint16'),
4849
# ('uint32', 'UINT32', 'uint32'),
49-
# ('uint64', 'UINT64', 'uint64'),
5050
# ('complex64', 'COMPLEX64', 'complex64'),
5151
# ('complex128', 'COMPLEX128', 'complex128')
5252
]

pandas/_libs/groupby.pyx

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -513,14 +513,7 @@ ctypedef fused mean_t:
513513

514514
ctypedef fused sum_t:
515515
mean_t
516-
int8_t
517-
int16_t
518-
int32_t
519516
int64_t
520-
521-
uint8_t
522-
uint16_t
523-
uint32_t
524517
uint64_t
525518
object
526519

pandas/core/dtypes/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def ensure_float(arr):
100100
ensure_int8 = algos.ensure_int8
101101
ensure_platform_int = algos.ensure_platform_int
102102
ensure_object = algos.ensure_object
103+
ensure_uint64 = algos.ensure_uint64
103104

104105

105106
def ensure_str(value: bytes | Any) -> str:

pandas/core/frame.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5824,6 +5824,7 @@ def set_index(
58245824
append: bool = ...,
58255825
inplace: Literal[False] = ...,
58265826
verify_integrity: bool = ...,
5827+
copy: bool | lib.NoDefault = ...,
58275828
) -> DataFrame:
58285829
...
58295830

@@ -5836,6 +5837,7 @@ def set_index(
58365837
append: bool = ...,
58375838
inplace: Literal[True],
58385839
verify_integrity: bool = ...,
5840+
copy: bool | lib.NoDefault = ...,
58395841
) -> None:
58405842
...
58415843

@@ -5847,6 +5849,7 @@ def set_index(
58475849
append: bool = False,
58485850
inplace: bool = False,
58495851
verify_integrity: bool = False,
5852+
copy: bool | lib.NoDefault = lib.no_default,
58505853
) -> DataFrame | None:
58515854
"""
58525855
Set the DataFrame index using existing columns.
@@ -5873,6 +5876,11 @@ def set_index(
58735876
Check the new index for duplicates. Otherwise defer the check until
58745877
necessary. Setting to False will improve the performance of this
58755878
method.
5879+
copy : bool, default True
5880+
Whether to make a copy of the underlying data when returning a new
5881+
DataFrame.
5882+
5883+
.. versionadded:: 1.5.0
58765884
58775885
Returns
58785886
-------
@@ -5938,6 +5946,13 @@ def set_index(
59385946
4 16 10 2014 31
59395947
"""
59405948
inplace = validate_bool_kwarg(inplace, "inplace")
5949+
if inplace:
5950+
if copy is not lib.no_default:
5951+
raise ValueError("Cannot specify copy when inplace=True")
5952+
copy = False
5953+
elif copy is lib.no_default:
5954+
copy = True
5955+
59415956
self._check_inplace_and_allows_duplicate_labels(inplace)
59425957
if not isinstance(keys, list):
59435958
keys = [keys]
@@ -5973,7 +5988,7 @@ def set_index(
59735988
if inplace:
59745989
frame = self
59755990
else:
5976-
frame = self.copy()
5991+
frame = self.copy(deep=copy)
59775992

59785993
arrays = []
59795994
names: list[Hashable] = []

pandas/core/groupby/ops.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
ensure_float64,
4747
ensure_int64,
4848
ensure_platform_int,
49+
ensure_uint64,
4950
is_1d_only_ea_dtype,
5051
is_bool_dtype,
5152
is_complex_dtype,
@@ -224,6 +225,13 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
224225
# result may still include NaN, so we have to cast
225226
values = ensure_float64(values)
226227

228+
elif how == "sum":
229+
# Avoid overflow during group op
230+
if values.dtype.kind == "i":
231+
values = ensure_int64(values)
232+
else:
233+
values = ensure_uint64(values)
234+
227235
return values
228236

229237
# TODO: general case implementation overridable by EAs.

pandas/core/reshape/merge.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from pandas.util._decorators import (
3636
Appender,
3737
Substitution,
38+
cache_readonly,
3839
)
3940
from pandas.util._exceptions import find_stack_level
4041

@@ -651,16 +652,6 @@ def __init__(
651652

652653
self.indicator = indicator
653654

654-
self.indicator_name: str | None
655-
if isinstance(self.indicator, str):
656-
self.indicator_name = self.indicator
657-
elif isinstance(self.indicator, bool):
658-
self.indicator_name = "_merge" if self.indicator else None
659-
else:
660-
raise ValueError(
661-
"indicator option can only accept boolean or string arguments"
662-
)
663-
664655
if not is_bool(left_index):
665656
raise ValueError(
666657
f"left_index parameter must be of type bool, not {type(left_index)}"
@@ -753,6 +744,17 @@ def _maybe_drop_cross_column(
753744
if cross_col is not None:
754745
del result[cross_col]
755746

747+
@cache_readonly
748+
def _indicator_name(self) -> str | None:
749+
if isinstance(self.indicator, str):
750+
return self.indicator
751+
elif isinstance(self.indicator, bool):
752+
return "_merge" if self.indicator else None
753+
else:
754+
raise ValueError(
755+
"indicator option can only accept boolean or string arguments"
756+
)
757+
756758
def _indicator_pre_merge(
757759
self, left: DataFrame, right: DataFrame
758760
) -> tuple[DataFrame, DataFrame]:
@@ -765,7 +767,7 @@ def _indicator_pre_merge(
765767
"Cannot use `indicator=True` option when "
766768
f"data contains a column named {i}"
767769
)
768-
if self.indicator_name in columns:
770+
if self._indicator_name in columns:
769771
raise ValueError(
770772
"Cannot use name of an existing column for indicator column"
771773
)
@@ -786,13 +788,13 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
786788
result["_left_indicator"] = result["_left_indicator"].fillna(0)
787789
result["_right_indicator"] = result["_right_indicator"].fillna(0)
788790

789-
result[self.indicator_name] = Categorical(
791+
result[self._indicator_name] = Categorical(
790792
(result["_left_indicator"] + result["_right_indicator"]),
791793
categories=[1, 2, 3],
792794
)
793-
result[self.indicator_name] = result[self.indicator_name].cat.rename_categories(
794-
["left_only", "right_only", "both"]
795-
)
795+
result[self._indicator_name] = result[
796+
self._indicator_name
797+
].cat.rename_categories(["left_only", "right_only", "both"])
796798

797799
result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1)
798800
return result

pandas/core/series.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4862,17 +4862,23 @@ def rename(
48624862
48634863
Parameters
48644864
----------
4865-
axis : {0 or 'index'}
4866-
Unused. Parameter needed for compatibility with DataFrame.
4867-
index : scalar, hashable sequence, dict-like or function, optional
4865+
index : scalar, hashable sequence, dict-like or function optional
48684866
Functions or dict-like are transformations to apply to
48694867
the index.
48704868
Scalar or hashable sequence-like will alter the ``Series.name``
48714869
attribute.
4872-
4873-
**kwargs
4874-
Additional keyword arguments passed to the function. Only the
4875-
"inplace" keyword is used.
4870+
axis : {0 or 'index'}
4871+
Unused. Parameter needed for compatibility with DataFrame.
4872+
copy : bool, default True
4873+
Also copy underlying data.
4874+
inplace : bool, default False
4875+
Whether to return a new Series. If True the value of copy is ignored.
4876+
level : int or level name, default None
4877+
In case of MultiIndex, only rename labels in the specified level.
4878+
errors : {'ignore', 'raise'}, default 'ignore'
4879+
If 'raise', raise `KeyError` when a `dict-like mapper` or
4880+
`index` contains labels that are not present in the index being transformed.
4881+
If 'ignore', existing keys will be renamed and extra keys will be ignored.
48764882
48774883
Returns
48784884
-------

0 commit comments

Comments
 (0)