Skip to content

API: Series(floaty, dtype=inty) #49609

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Dec 15, 2022
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ Other API changes
- :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`)
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`)
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`)
- Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)
- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`)
Expand Down
55 changes: 3 additions & 52 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
DtypeObj,
T,
)
from pandas.errors import IntCastingNaNError

from pandas.core.dtypes.base import (
ExtensionDtype,
Expand All @@ -46,7 +45,6 @@
is_datetime64_ns_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
Expand Down Expand Up @@ -503,7 +501,6 @@ def sanitize_array(
copy: bool = False,
*,
allow_2d: bool = False,
strict_ints: bool = False,
) -> ArrayLike:
"""
Sanitize input data to an ndarray or ExtensionArray, copy if specified,
Expand All @@ -517,8 +514,6 @@ def sanitize_array(
copy : bool, default False
allow_2d : bool, default False
If False, raise if we have a 2D Arraylike.
strict_ints : bool, default False
If False, silently ignore failures to cast float data to int dtype.

Returns
-------
Expand Down Expand Up @@ -571,32 +566,7 @@ def sanitize_array(
if isinstance(data, np.matrix):
data = data.A

if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
# possibility of nan -> garbage
try:
# GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
# casting aligning with IntCastingNaNError below
with np.errstate(invalid="ignore"):
# GH#15832: Check if we are requesting a numeric dtype and
# that we can convert the data to the requested dtype.
subarr = maybe_cast_to_integer_array(data, dtype)

except IntCastingNaNError:
raise
except ValueError:
# Pre-2.0, we would have different behavior for Series vs DataFrame.
# DataFrame would call np.array(data, dtype=dtype, copy=copy),
# which would cast to the integer dtype even if the cast is lossy.
# See GH#40110.
if strict_ints:
raise

# We ignore the dtype arg and return floating values,
# e.g. test_constructor_floating_data_int_dtype
# TODO: where is the discussion that documents the reason for this?
subarr = np.array(data, copy=copy)

elif dtype is None:
if dtype is None:
subarr = data
if data.dtype == object:
subarr = maybe_infer_to_datetimelike(data)
Expand Down Expand Up @@ -629,27 +599,8 @@ def sanitize_array(
subarr = np.array([], dtype=np.float64)

elif dtype is not None:
try:
subarr = _try_cast(data, dtype, copy)
except ValueError:
if is_integer_dtype(dtype):
if strict_ints:
raise
casted = np.array(data, copy=False)
if casted.dtype.kind == "f":
# GH#40110 match the behavior we have if we passed
# a ndarray[float] to begin with
return sanitize_array(
casted,
index,
dtype,
copy=False,
allow_2d=allow_2d,
)
else:
raise
else:
raise
subarr = _try_cast(data, dtype, copy)

else:
subarr = maybe_convert_platform(data)
if subarr.dtype == object:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def __new__(
data = com.asarray_tuplesafe(data, dtype=_dtype_obj)

try:
arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
arr = sanitize_array(data, None, dtype=dtype, copy=copy)
except ValueError as err:
if "index must be specified when data is not list-like" in str(err):
raise cls._raise_scalar_data_error(data) from err
Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2706,11 +2706,12 @@ def test_floating_values_integer_dtype(self):

arr = np.random.randn(10, 5)

# as of 2.0, we match Series behavior by retaining float dtype instead
# of doing a lossy conversion here. Below we _do_ do the conversion
# since it is lossless.
df = DataFrame(arr, dtype="i8")
assert (df.dtypes == "f8").all()
# GH#49599 in 2.0 we raise instead of either
# a) silently ignoring dtype and returningfloat (the old Series behavior) or
# b) rounding (the old DataFrame behavior)
msg = "Trying to coerce float values to integers"
with pytest.raises(ValueError, match=msg):
DataFrame(arr, dtype="i8")

df = DataFrame(arr.round(), dtype="i8")
assert (df.dtypes == "i8").all()
Expand Down
33 changes: 21 additions & 12 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,11 +801,13 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
# not clear if this is what we want long-term
expected = frame_or_series(arr)

res = frame_or_series(arr, dtype="i8")
tm.assert_equal(res, expected)
# GH#49599 as of 2.0 we raise instead of silently retaining float dtype
msg = "Trying to coerce float values to integer"
with pytest.raises(ValueError, match=msg):
frame_or_series(arr, dtype="i8")

res = frame_or_series(list(arr), dtype="i8")
tm.assert_equal(res, expected)
with pytest.raises(ValueError, match=msg):
frame_or_series(list(arr), dtype="i8")

# pre-2.0, when we had NaNs, we silently ignored the integer dtype
arr[0] = np.nan
Expand All @@ -815,7 +817,12 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
with pytest.raises(IntCastingNaNError, match=msg):
frame_or_series(arr, dtype="i8")

with pytest.raises(IntCastingNaNError, match=msg):
exc = IntCastingNaNError
if frame_or_series is Series:
# TODO: try to align these
exc = ValueError
msg = "cannot convert float NaN to integer"
with pytest.raises(exc, match=msg):
# same behavior if we pass list instead of the ndarray
frame_or_series(list(arr), dtype="i8")

Expand All @@ -833,13 +840,14 @@ def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
# see gh-15832
# Updated: make sure we treat this list the same as we would treat
# the equivalent ndarray
# GH#49599 pre-2.0 we silently retained float dtype, in 2.0 we raise
vals = [1, 2, 3.5]

res = Series(vals, dtype=any_int_numpy_dtype)
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
tm.assert_series_equal(res, expected)
alt = Series(np.array(vals)) # i.e. we ignore the dtype kwd
tm.assert_series_equal(alt, expected)
msg = "Trying to coerce float values to integer"
with pytest.raises(ValueError, match=msg):
Series(vals, dtype=any_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
Series(np.array(vals), dtype=any_int_numpy_dtype)

def test_constructor_coerce_float_valid(self, float_numpy_dtype):
s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
Expand All @@ -853,9 +861,10 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
vals = [1, 2, np.nan]
# pre-2.0 this would return with a float dtype, in 2.0 we raise

msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
msg = "cannot convert float NaN to integer"
with pytest.raises(ValueError, match=msg):
Series(vals, dtype=any_int_numpy_dtype)
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
Series(np.array(vals), dtype=any_int_numpy_dtype)

Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/test_downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,10 @@ def test_construct_dask_float_array_int_dtype_match_ndarray():
expected = Series(arr)
tm.assert_series_equal(res, expected)

res = Series(darr, dtype="i8")
expected = Series(arr, dtype="i8")
tm.assert_series_equal(res, expected)
# GH#49599 in 2.0 we raise instead of silently ignoring the dtype
msg = "Trying to coerce float values to integers"
with pytest.raises(ValueError, match=msg):
Series(darr, dtype="i8")

msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
arr[2] = np.nan
Expand Down