diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 75ba169600962..12b0d90e68ab9 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -830,6 +830,7 @@ Conversion - Bug in :meth:`Series.convert_dtypes` not converting dtype to nullable dtype when :class:`Series` contains ``NA`` and has dtype ``object`` (:issue:`48791`) - Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`) - Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`) +- Bug in :meth:`Series.astype` raising ``pyarrow.ArrowInvalid`` when converting from a non-pyarrow string dtype to a pyarrow numeric type (:issue:`50430`) - Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`) - Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`) - diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 6250c298f291f..7e954b3d1d1ec 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -206,17 +206,17 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal Construct a new ExtensionArray from a sequence of scalars. """ pa_dtype = to_pyarrow_type(dtype) - is_cls = isinstance(scalars, cls) - if is_cls or isinstance(scalars, (pa.Array, pa.ChunkedArray)): - if is_cls: - scalars = scalars._data - if pa_dtype: - scalars = scalars.cast(pa_dtype) - return cls(scalars) - else: - return cls( - pa.chunked_array(pa.array(scalars, type=pa_dtype, from_pandas=True)) - ) + if isinstance(scalars, cls): + scalars = scalars._data + elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)): + try: + scalars = pa.array(scalars, type=pa_dtype, from_pandas=True) + except pa.ArrowInvalid: + # GH50430: let pyarrow infer type, then cast + scalars = pa.array(scalars, from_pandas=True) + if pa_dtype: + scalars = scalars.cast(pa_dtype) + return cls(scalars) @classmethod def _from_sequence_of_strings( diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9b42b86efd0d0..37abdefa25f6e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1471,6 +1471,14 @@ def test_astype_from_non_pyarrow(data): tm.assert_extension_array_equal(result, data) +def test_astype_float_from_non_pyarrow_str(): + # GH50430 + ser = pd.Series(["1.0"]) + result = ser.astype("float64[pyarrow]") + expected = pd.Series([1.0], dtype="float64[pyarrow]") + tm.assert_series_equal(result, expected) + + def test_to_numpy_with_defaults(data): # GH49973 result = data.to_numpy()