Skip to content

Commit ad9d42a

Browse files
authored
BUG: Series(floatlike, dtype=intlike) inconsistent with non-ndarray data (#45142)
1 parent b17cedd commit ad9d42a

File tree

4 files changed

+70
-10
lines changed

4 files changed

+70
-10
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ Numeric
137137

138138
Conversion
139139
^^^^^^^^^^
140-
-
140+
- Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`)
141141
-
142142

143143
Strings

pandas/core/construction.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,23 @@ def sanitize_array(
588588
data = list(data)
589589

590590
if dtype is not None or len(data) == 0:
591-
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
591+
try:
592+
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
593+
except ValueError:
594+
casted = np.array(data, copy=False)
595+
if casted.dtype.kind == "f" and is_integer_dtype(dtype):
596+
# GH#40110 match the behavior we have if we passed
597+
# a ndarray[float] to begin with
598+
return sanitize_array(
599+
casted,
600+
index,
601+
dtype,
602+
copy=False,
603+
raise_cast_failure=raise_cast_failure,
604+
allow_2d=allow_2d,
605+
)
606+
else:
607+
raise
592608
else:
593609
subarr = maybe_convert_platform(data)
594610
if subarr.dtype == object:

pandas/core/dtypes/cast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1666,7 +1666,7 @@ def maybe_cast_to_integer_array(
16661666
16671667
Also, if you try to coerce float values to integers, it raises:
16681668
1669-
>>> pd.Series([1, 2, 3.5], dtype="int64")
1669+
>>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64"))
16701670
Traceback (most recent call last):
16711671
...
16721672
ValueError: Trying to coerce float values to integers

pandas/tests/series/test_constructors.py

+51-7
Original file line numberDiff line numberDiff line change
@@ -754,23 +754,67 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype)
754754
with pytest.raises(OverflowError, match=msg):
755755
Series([-1], dtype=any_unsigned_int_numpy_dtype)
756756

757+
@td.skip_if_no("dask")
758+
def test_construct_dask_float_array_int_dtype_match_ndarray(self):
759+
# GH#40110 make sure we treat a float-dtype dask array with the same
760+
# rules we would for an ndarray
761+
import dask.dataframe as dd
762+
763+
arr = np.array([1, 2.5, 3])
764+
darr = dd.from_array(arr)
765+
766+
res = Series(darr)
767+
expected = Series(arr)
768+
tm.assert_series_equal(res, expected)
769+
770+
res = Series(darr, dtype="i8")
771+
expected = Series(arr, dtype="i8")
772+
tm.assert_series_equal(res, expected)
773+
774+
msg = "In a future version, passing float-dtype values containing NaN"
775+
arr[2] = np.nan
776+
with tm.assert_produces_warning(FutureWarning, match=msg):
777+
res = Series(darr, dtype="i8")
778+
with tm.assert_produces_warning(FutureWarning, match=msg):
779+
expected = Series(arr, dtype="i8")
780+
tm.assert_series_equal(res, expected)
781+
757782
def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
758783
# see gh-15832
759-
msg = "Trying to coerce float values to integers"
760-
with pytest.raises(ValueError, match=msg):
761-
Series([1, 2, 3.5], dtype=any_int_numpy_dtype)
784+
# Updated: make sure we treat this list the same as we would treat
785+
# the equivalent ndarray
786+
vals = [1, 2, 3.5]
787+
788+
res = Series(vals, dtype=any_int_numpy_dtype)
789+
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
790+
tm.assert_series_equal(res, expected)
791+
alt = Series(np.array(vals)) # i.e. we ignore the dtype kwd
792+
tm.assert_series_equal(alt, expected)
762793

763794
def test_constructor_coerce_float_valid(self, float_numpy_dtype):
764795
s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
765796
expected = Series([1, 2, 3.5]).astype(float_numpy_dtype)
766797
tm.assert_series_equal(s, expected)
767798

768-
def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype):
799+
def test_constructor_invalid_coerce_ints_with_float_nan(
800+
self, any_int_numpy_dtype, request
801+
):
769802
# GH 22585
803+
# Updated: make sure we treat this list the same as we would treat the
804+
# equivalent ndarray
805+
if np_version_under1p19 and np.dtype(any_int_numpy_dtype).kind == "u":
806+
mark = pytest.mark.xfail(reason="Produces an extra RuntimeWarning")
807+
request.node.add_marker(mark)
770808

771-
msg = "cannot convert float NaN to integer"
772-
with pytest.raises(ValueError, match=msg):
773-
Series([1, 2, np.nan], dtype=any_int_numpy_dtype)
809+
vals = [1, 2, np.nan]
810+
811+
msg = "In a future version, passing float-dtype values containing NaN"
812+
with tm.assert_produces_warning(FutureWarning, match=msg):
813+
res = Series(vals, dtype=any_int_numpy_dtype)
814+
with tm.assert_produces_warning(FutureWarning, match=msg):
815+
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
816+
tm.assert_series_equal(res, expected)
817+
assert np.isnan(expected.iloc[-1])
774818

775819
def test_constructor_dtype_no_cast(self):
776820
# see gh-1572

0 commit comments

Comments
 (0)