diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 96b4ab7f3fbc6..2dc63fb97ed54 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -21,8 +21,8 @@ is_extension_array_dtype, is_extension_type, is_float_dtype, is_integer_dtype, is_iterator, is_list_like, is_object_dtype, pandas_dtype) from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPandasArray, - ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex) + ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex, ABCSeries, + ABCTimedeltaIndex) from pandas.core.dtypes.missing import isna from pandas.core import algorithms, common as com @@ -570,59 +570,40 @@ def sanitize_array(data, index, dtype=None, copy=False, else: data = data.copy() + # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): - if dtype is not None: - subarr = np.array(data, copy=False) - + if (dtype is not None + and is_float_dtype(data.dtype) and is_integer_dtype(dtype)): # possibility of nan -> garbage - if is_float_dtype(data.dtype) and is_integer_dtype(dtype): - try: - subarr = _try_cast(data, True, dtype, copy, - True) - except ValueError: - if copy: - subarr = data.copy() - else: - subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) - elif isinstance(data, Index): - # don't coerce Index types - # e.g. indexes can have different conversions (so don't fast path - # them) - # GH#6140 - subarr = sanitize_index(data, index, copy=copy) + try: + subarr = _try_cast(data, dtype, copy, True) + except ValueError: + if copy: + subarr = data.copy() + else: + subarr = np.array(data, copy=False) else: - # we will try to copy be-definition here - subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) + subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): - if isinstance(data, ABCPandasArray): - # We don't want to let people put our PandasArray wrapper - # (the output of Series/Index.array), into a Series. So - # we explicitly unwrap it here. - subarr = data.to_numpy() - else: - subarr = data - - # everything else in this block must also handle ndarray's, - # because we've unwrapped PandasArray into an ndarray. + # it is already ensured above this is not a PandasArray + subarr = data if dtype is not None: - subarr = data.astype(dtype) - - if copy: - subarr = data.copy() + subarr = subarr.astype(dtype, copy=copy) + elif copy: + subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: - subarr = _try_cast(data, False, dtype, copy, - raise_cast_failure) + subarr = _try_cast(data, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise @@ -637,9 +618,9 @@ def sanitize_array(data, index, dtype=None, copy=False, elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype='int64') - subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure) + subarr = _try_cast(arr, dtype, copy, raise_cast_failure) else: - subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) + subarr = _try_cast(data, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: @@ -698,10 +679,22 @@ def sanitize_array(data, index, dtype=None, copy=False, return subarr -def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure): - +def _try_cast(arr, dtype, copy, raise_cast_failure): + """ + Convert input to numpy ndarray and optionally cast to a given dtype. + + Parameters + ---------- + arr : array-like + dtype : np.dtype, ExtensionDtype or None + copy : bool + If False, don't copy the data if not needed. + raise_cast_failure : bool + If True, and if a dtype is specified, raise errors during casting. + Otherwise an object array is returned. + """ # perf shortcut as this is the most common case - if take_fast_path: + if isinstance(arr, np.ndarray): if maybe_castable(arr) and not copy and dtype is None: return arr