From d3b63388442c7d67c5fb0fc18084c0be52797423 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 07:50:25 -0700 Subject: [PATCH 01/11] put slower isinstance checks after faster c-space checks --- pandas/_libs/lib.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 27ee685acfde7..3e7dab65f52a9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -157,13 +157,13 @@ def is_scalar(val: object) -> bool: return (cnp.PyArray_IsAnyScalar(val) # PyArray_IsAnyScalar is always False for bytearrays on Py3 - or isinstance(val, (Fraction, Number)) - # We differ from numpy, which claims that None is not scalar; - # see np.isscalar - or val is None or PyDate_Check(val) or PyDelta_Check(val) or PyTime_Check(val) + # We differ from numpy, which claims that None is not scalar; + # see np.isscalar + or val is None + or isinstance(val, (Fraction, Number)) or util.is_period_object(val) or is_decimal(val) or is_interval(val) From 02f86541740b6265120062078272e2a720acdd5b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 10:26:39 -0700 Subject: [PATCH 02/11] simplify runtime imports --- pandas/core/algorithms.py | 8 +------- pandas/core/arrays/categorical.py | 11 +++-------- pandas/core/arrays/integer.py | 3 +-- pandas/core/arrays/interval.py | 5 +++-- pandas/core/arrays/numpy_.py | 9 ++------- pandas/core/arrays/sparse.py | 10 ---------- pandas/core/dtypes/missing.py | 4 ++-- pandas/util/_validators.py | 2 +- 8 files changed, 13 insertions(+), 39 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2c38e071d3d44..0cfa6df99ff97 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1977,12 +1977,6 @@ def diff(arr, n, axis=0): out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] if is_timedelta: - from pandas import TimedeltaIndex - - out_arr = ( - TimedeltaIndex(out_arr.ravel().astype("int64")) - .asi8.reshape(out_arr.shape) - .astype("timedelta64[ns]") - ) + out_arr = out_arr.astype("int64").view("timedelta64[ns]") return out_arr diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index df5cd12a479f0..c4458d7a4dc86 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -6,7 +6,7 @@ from pandas._config import get_option -from pandas._libs import algos as libalgos, lib +from pandas._libs import algos as libalgos, hashtable as htable, lib from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, @@ -1527,9 +1527,7 @@ def value_counts(self, dropna=True): See Also -------- Series.value_counts - """ - from numpy import bincount from pandas import Series, CategoricalIndex code, cat = self._codes, self.categories @@ -1538,9 +1536,9 @@ def value_counts(self, dropna=True): if dropna or clean: obs = code if clean else code[mask] - count = bincount(obs, minlength=ncat or 0) + count = np.bincount(obs, minlength=ncat or 0) else: - count = bincount(np.where(mask, code, ncat)) + count = np.bincount(np.where(mask, code, ncat)) ix = np.append(ix, -1) ix = self._constructor(ix, dtype=self.dtype, fastpath=True) @@ -2329,9 +2327,6 @@ def mode(self, dropna=True): ------- modes : `Categorical` (sorted) """ - - import pandas._libs.hashtable as htable - codes = self._codes if dropna: good = self._codes != -1 diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 867122964fe59..62b1a8a184946 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -25,6 +25,7 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core import nanops, ops +from pandas.core.algorithms import take from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin from pandas.core.tools.numeric import to_numeric @@ -420,8 +421,6 @@ def __iter__(self): yield self._data[i] def take(self, indexer, allow_fill=False, fill_value=None): - from pandas.api.extensions import take - # we always fill with 1 internally # to avoid upcasting data_fill_value = 1 if isna(fill_value) else fill_value diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index a0319fe96896a..7f1aad3ba3261 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -25,6 +25,7 @@ from pandas.core.dtypes.dtypes import IntervalDtype from pandas.core.dtypes.generic import ( ABCDatetimeIndex, + ABCIndexClass, ABCInterval, ABCIntervalIndex, ABCPeriodIndex, @@ -35,7 +36,7 @@ from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs from pandas.core.arrays.categorical import Categorical import pandas.core.common as com -from pandas.core.indexes.base import Index, ensure_index +from pandas.core.indexes.base import ensure_index _VALID_CLOSED = {"left", "right", "both", "neither"} _interval_shared_docs = {} @@ -510,7 +511,7 @@ def __getitem__(self, value): right = self.right[value] # scalar - if not isinstance(left, Index): + if not isinstance(left, ABCIndexClass): if isna(left): return self._fill_value return Interval(left, right, self.closed) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 9f428a4ac10b2..77c9a3bc98690 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -11,10 +11,11 @@ from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.inference import is_array_like, is_list_like +from pandas.core.dtypes.missing import isna from pandas import compat from pandas.core import nanops -from pandas.core.algorithms import searchsorted +from pandas.core.algorithms import searchsorted, take, unique from pandas.core.missing import backfill_1d, pad_1d from .base import ExtensionArray, ExtensionOpsMixin @@ -249,8 +250,6 @@ def nbytes(self): return self._ndarray.nbytes def isna(self): - from pandas import isna - return isna(self._ndarray) def fillna(self, value=None, method=None, limit=None): @@ -281,8 +280,6 @@ def fillna(self, value=None, method=None, limit=None): return new_values def take(self, indices, allow_fill=False, fill_value=None): - from pandas.core.algorithms import take - result = take( self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value ) @@ -298,8 +295,6 @@ def _values_for_factorize(self): return self._ndarray, -1 def unique(self): - from pandas import unique - return type(self)(unique(self._ndarray)) # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 65976021f5053..9376b49112f6f 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -105,8 +105,6 @@ class SparseDtype(ExtensionDtype): _metadata = ("_dtype", "_fill_value", "_is_na_fill_value") def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: - from pandas.core.dtypes.missing import na_value_for_dtype - from pandas.core.dtypes.common import pandas_dtype, is_string_dtype, is_scalar if isinstance(dtype, type(self)): if fill_value is None: @@ -178,20 +176,14 @@ def fill_value(self): @property def _is_na_fill_value(self): - from pandas.core.dtypes.missing import isna - return isna(self.fill_value) @property def _is_numeric(self): - from pandas.core.dtypes.common import is_object_dtype - return not is_object_dtype(self.subtype) @property def _is_boolean(self): - from pandas.core.dtypes.common import is_bool_dtype - return is_bool_dtype(self.subtype) @property @@ -928,8 +920,6 @@ def values(self): return self.to_dense() def isna(self): - from pandas import isna - # If null fill value, we want SparseDtype[bool, true] # to preserve the same memory usage. dtype = SparseDtype(bool, self._null_fill_value) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index bea73d72b91c9..6f599a6be6021 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -3,6 +3,8 @@ """ import numpy as np +from pandas._config import get_option + from pandas._libs import lib import pandas._libs.missing as libmissing from pandas._libs.tslibs import NaT, iNaT @@ -203,8 +205,6 @@ def _use_inf_as_na(key): * http://stackoverflow.com/questions/4859217/ programmatically-creating-variables-in-python/4859312#4859312 """ - from pandas._config import get_option - flag = get_option(key) if flag: globals()["_isna"] = _isna_old diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 8d5f9f7749682..ffab6d016a887 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -4,7 +4,7 @@ """ import warnings -from pandas.core.dtypes.common import is_bool +from pandas._libs.lib import is_bool def _check_arg_length(fname, args, max_fname_arg_count, compat_args): From b14a6f27c2adacbcc03cacad5b66d8ab7f707002 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 11:42:07 -0700 Subject: [PATCH 03/11] remove unnecessary imports --- pandas/compat/pickle_compat.py | 1 - pandas/core/reshape/merge.py | 3 --- pandas/core/reshape/reshape.py | 5 ++--- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 0934d8529fdf7..bca33513b0069 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -6,7 +6,6 @@ import pickle as pkl import sys -import pandas # noqa from pandas import Index diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c1a07c129f7cd..3a2e6a8b7ff62 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1292,8 +1292,6 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how="inner", **kwargs) indexers into the left_keys, right_keys """ - from functools import partial - assert len(left_keys) == len( right_keys ), "left_key and right_keys must be the same length" @@ -1767,7 +1765,6 @@ def flip(xs): def _get_multiindex_indexer(join_keys, index, sort): - from functools import partial # bind `sort` argument fkeys = partial(_factorize_keys, sort=sort) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 540a06caec220..4bb6c26a5c854 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -855,7 +855,6 @@ def get_dummies( 2 0.0 0.0 1.0 """ from pandas.core.reshape.concat import concat - from itertools import cycle dtypes_to_encode = ["object", "category"] @@ -884,7 +883,7 @@ def check_len(item, name): check_len(prefix_sep, "prefix_sep") if isinstance(prefix, str): - prefix = cycle([prefix]) + prefix = itertools.cycle([prefix]) if isinstance(prefix, dict): prefix = [prefix[col] for col in data_to_encode.columns] @@ -893,7 +892,7 @@ def check_len(item, name): # validate separators if isinstance(prefix_sep, str): - prefix_sep = cycle([prefix_sep]) + prefix_sep = itertools.cycle([prefix_sep]) elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] From b6efc2bb335f6fef0467ec4b0b4bfe90da69cea2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 12:06:46 -0700 Subject: [PATCH 04/11] remove duplicate import --- pandas/core/arrays/categorical.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c4458d7a4dc86..f03ea1078d7d8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2717,8 +2717,6 @@ def _recode_for_categories(codes, old_categories, new_categories): >>> _recode_for_categories(codes, old_cat, new_cat) array([ 1, 0, 0, -1]) """ - from pandas.core.algorithms import take_1d - if len(old_categories) == 0: # All null anyway, so just retain the nulls return codes.copy() From f60506bd032964bf95247bfe41471ab1597ca558 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 12:08:37 -0700 Subject: [PATCH 05/11] dont do imports at runtime --- pandas/core/arrays/categorical.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f03ea1078d7d8..29fbcecd40b43 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -50,7 +50,14 @@ from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms -from pandas.core.algorithms import factorize, take, take_1d, unique1d +from pandas.core.algorithms import ( + factorize, + take, + take_1d, + unique1d, + _get_data_algo, + _hashtables, +) from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com from pandas.core.missing import interpolate_2d @@ -2666,8 +2673,6 @@ def _get_codes_for_values(values, categories): """ utility routine to turn values into codes given the specified categories """ - from pandas.core.algorithms import _get_data_algo, _hashtables - dtype_equal = is_dtype_equal(values.dtype, categories.dtype) if dtype_equal: From 65c0aa2e06ff75da93c4155c0cf6ea922edc96ee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 14:18:13 -0700 Subject: [PATCH 06/11] cleanup --- pandas/core/dtypes/cast.py | 2 +- pandas/core/dtypes/dtypes.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 44a3fefb1689a..220a02f2ca35b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1368,7 +1368,7 @@ def maybe_cast_to_integer_array(arr, dtype, copy=False): arr = np.asarray(arr) if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): - raise OverflowError("Trying to coerce negative values " "to unsigned integers") + raise OverflowError("Trying to coerce negative values to unsigned integers") if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)): raise ValueError("Trying to coerce float values to integers") diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 6728d048efb79..35cf6d9a5ab26 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -12,7 +12,7 @@ from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass from .base import ExtensionDtype -from .inference import is_list_like +from .inference import is_list_like, is_bool str_type = str @@ -490,8 +490,6 @@ def validate_ordered(ordered: OrderedType) -> None: TypeError If 'ordered' is not a boolean. """ - from pandas.core.dtypes.common import is_bool - if not is_bool(ordered): raise TypeError("'ordered' must either be 'True' or 'False'") From 4843eb0534992c924eead5d3d36645b5626069c9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 17:23:26 -0700 Subject: [PATCH 07/11] simplify imports, avoid runtime imports --- pandas/_libs/lib.pyx | 6 +++--- pandas/core/base.py | 7 +------ pandas/core/common.py | 1 - pandas/core/dtypes/concat.py | 4 ++-- pandas/core/dtypes/dtypes.py | 4 ++-- pandas/core/nanops.py | 12 +++++------- pandas/core/resample.py | 21 ++++++++++----------- pandas/core/strings.py | 28 +++++++++++++++------------- 8 files changed, 38 insertions(+), 45 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3e7dab65f52a9..e32c3e0207187 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1236,7 +1236,9 @@ def infer_dtype(value: object, skipna: object=None) -> str: # e.g. categoricals try: values = getattr(value, '_values', getattr(value, 'values', value)) - except: + except TypeError: + # This gets hit if we have an EA, since cython expects `values` + # to be an ndarray value = _try_infer_map(value) if value is not None: return value @@ -1252,8 +1254,6 @@ def infer_dtype(value: object, skipna: object=None) -> str: construct_1d_object_array_from_listlike) values = construct_1d_object_array_from_listlike(value) - values = getattr(values, 'values', values) - # make contiguous values = values.ravel() diff --git a/pandas/core/base.py b/pandas/core/base.py index 9480e2e425f79..f7b3fe723c28c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -32,6 +32,7 @@ from pandas.core import algorithms, common as com from pandas.core.accessor import DirNamesMixin +from pandas.core.algorithms import duplicated, unique1d, value_counts from pandas.core.arrays import ExtensionArray import pandas.core.nanops as nanops @@ -1381,8 +1382,6 @@ def value_counts( 1.0 1 dtype: int64 """ - from pandas.core.algorithms import value_counts - result = value_counts( self, sort=sort, @@ -1400,8 +1399,6 @@ def unique(self): result = values.unique() else: - from pandas.core.algorithms import unique1d - result = unique1d(values) return result @@ -1631,8 +1628,6 @@ def drop_duplicates(self, keep="first", inplace=False): return result def duplicated(self, keep="first"): - from pandas.core.algorithms import duplicated - if isinstance(self, ABCIndexClass): if self.is_unique: return np.zeros(len(self), dtype=np.bool) diff --git a/pandas/core/common.py b/pandas/core/common.py index d2dd0d03d9425..f9a19291b8ad9 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -254,7 +254,6 @@ def asarray_tuplesafe(values, dtype=None): if result.ndim == 2: # Avoid building an array of arrays: - # TODO: verify whether any path hits this except #18819 (invalid) values = [tuple(x) for x in values] result = construct_1d_object_array_from_listlike(values) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ac74ad5726a99..d047ff5eb7747 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -362,7 +362,7 @@ def _maybe_unwrap(x): if sort_categories and not ignore_order and ordered: raise TypeError( - "Cannot use sort_categories=True with " "ordered Categoricals" + "Cannot use sort_categories=True with ordered Categoricals" ) if sort_categories and not categories.is_monotonic_increasing: @@ -386,7 +386,7 @@ def _maybe_unwrap(x): else: # ordered - to show a proper error message if all(c.ordered for c in to_union): - msg = "to union ordered Categoricals, " "all categories must be the same" + msg = "to union ordered Categoricals, all categories must be the same" raise TypeError(msg) else: raise TypeError("Categorical.ordered must be the same") diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 35cf6d9a5ab26..ec3fb7b2b3caf 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -149,7 +149,7 @@ def __repr__(self) -> str_type: return str(self) def __hash__(self) -> int: - raise NotImplementedError("sub-classes should implement an __hash__ " "method") + raise NotImplementedError("sub-classes should implement an __hash__ method") def __getstate__(self) -> Dict[str_type, Any]: # pickle support; we don't want to pickle the cache @@ -320,7 +320,7 @@ def _from_values_or_dtype( raise ValueError(msg.format(dtype=dtype)) elif categories is not None or ordered is not None: raise ValueError( - "Cannot specify `categories` or `ordered` " "together with `dtype`." + "Cannot specify `categories` or `ordered` together with `dtype`." ) elif is_categorical(values): # If no "dtype" was passed, use the one from "values", but honor diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ce14cb22a88ce..d3dacee0468c6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -33,8 +33,6 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna -import pandas.core.common as com - bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") _BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False @@ -281,12 +279,12 @@ def _get_values( mask = _maybe_get_mask(values, skipna, mask) if is_datetime64tz_dtype(values): - # com.values_from_object returns M8[ns] dtype instead of tz-aware, + # lib.values_from_object returns M8[ns] dtype instead of tz-aware, # so this case must be handled separately from the rest dtype = values.dtype values = getattr(values, "_values", values) else: - values = com.values_from_object(values) + values = lib.values_from_object(values) dtype = values.dtype if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): @@ -742,7 +740,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): >>> nanops.nanvar(s) 1.0 """ - values = com.values_from_object(values) + values = lib.values_from_object(values) dtype = values.dtype mask = _maybe_get_mask(values, skipna, mask) if is_any_int_dtype(values): @@ -943,7 +941,7 @@ def nanskew(values, axis=None, skipna=True, mask=None): >>> nanops.nanskew(s) 1.7320508075688787 """ - values = com.values_from_object(values) + values = lib.values_from_object(values) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") @@ -1022,7 +1020,7 @@ def nankurt(values, axis=None, skipna=True, mask=None): >>> nanops.nankurt(s) -1.2892561983471076 """ - values = com.values_from_object(values) + values = lib.values_from_object(values) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") diff --git a/pandas/core/resample.py b/pandas/core/resample.py index b4a3e6ed71bf4..21f752fada04b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -7,7 +7,7 @@ import numpy as np from pandas._libs import lib -from pandas._libs.tslibs import NaT, Timestamp +from pandas._libs.tslibs import NaT, Period, Timestamp from pandas._libs.tslibs.frequencies import is_subperiod, is_superperiod from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.compat.numpy import function as nv @@ -16,7 +16,6 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -import pandas as pd import pandas.core.algorithms as algos from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin @@ -25,7 +24,7 @@ from pandas.core.groupby.grouper import Grouper from pandas.core.groupby.ops import BinGrouper from pandas.core.indexes.datetimes import DatetimeIndex, date_range -from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.period import PeriodIndex, period_range from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range from pandas.tseries.frequencies import to_offset @@ -138,7 +137,7 @@ def _typ(self): """ Masquerade for compat as a Series or a DataFrame. """ - if isinstance(self._selected_obj, pd.Series): + if isinstance(self._selected_obj, ABCSeries): return "series" return "dataframe" @@ -860,7 +859,9 @@ def size(self): # a copy of 0-len objects. GH14962 result = self._downsample("size") if not len(self.ax) and isinstance(self._selected_obj, ABCDataFrame): - result = pd.Series([], index=result.index, dtype="int64") + from pandas import Series + + result = Series([], index=result.index, dtype="int64") return result def quantile(self, q=0.5, **kwargs): @@ -1563,9 +1564,7 @@ def _get_time_period_bins(self, ax): binner = labels = PeriodIndex(data=[], freq=freq, name=ax.name) return binner, [], labels - labels = binner = pd.period_range( - start=ax[0], end=ax[-1], freq=freq, name=ax.name - ) + labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name) end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp() if ax.tzinfo: @@ -1608,11 +1607,11 @@ def _get_period_bins(self, ax): ) # Get offset for bin edge (not label edge) adjustment - start_offset = pd.Period(start, self.freq) - pd.Period(p_start, self.freq) + start_offset = Period(start, self.freq) - Period(p_start, self.freq) bin_shift = start_offset.n % freq_mult start = p_start - labels = binner = pd.period_range( + labels = binner = period_range( start=start, end=end, freq=self.freq, name=ax.name ) @@ -1732,7 +1731,7 @@ def _get_period_range_edges(first, last, offset, closed="left", base=0): ------- A tuple of length 2, containing the adjusted pd.Period objects. """ - if not all(isinstance(obj, pd.Period) for obj in [first, last]): + if not all(isinstance(obj, Period) for obj in [first, last]): raise TypeError("'first' and 'last' must be instances of type Period") # GH 23882 diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 7c293ca4e50b0..161840bda0806 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -763,7 +763,7 @@ def _str_extract_noexpand(arr, pat, flags=0): Index. """ - from pandas import DataFrame, Index + from pandas import DataFrame regex = re.compile(pat, flags=flags) groups_or_na = _groups_or_na_fun(regex) @@ -772,7 +772,7 @@ def _str_extract_noexpand(arr, pat, flags=0): result = np.array([groups_or_na(val)[0] for val in arr], dtype=object) name = _get_single_group_name(regex) else: - if isinstance(arr, Index): + if isinstance(arr, ABCIndexClass): raise ValueError("only one regex group is supported with Index") name = None names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) @@ -2003,7 +2003,7 @@ def _wrap_result( # infer from ndim if expand is not specified expand = result.ndim != 1 - elif expand is True and not isinstance(self._orig, Index): + elif expand is True and not isinstance(self._orig, ABCIndexClass): # required when expand=True is explicitly specified # not needed when inferred @@ -2036,7 +2036,7 @@ def cons_row(x): # Wait until we are sure result is a Series or Index before # checking attributes (GH 12180) - if isinstance(self._orig, Index): + if isinstance(self._orig, ABCIndexClass): # if result is a boolean np.array, return the np.array # instead of wrapping it into a boolean Index (GH 8875) if is_bool_dtype(result): @@ -2084,10 +2084,10 @@ def _get_series_list(self, others, ignore_index=False): # Once str.cat defaults to alignment, this function can be simplified; # will not need `ignore_index` and the second boolean output anymore - from pandas import Index, Series, DataFrame + from pandas import Series, DataFrame # self._orig is either Series or Index - idx = self._orig if isinstance(self._orig, Index) else self._orig.index + idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index err_msg = ( "others must be Series, Index, DataFrame, np.ndarray or " @@ -2099,14 +2099,14 @@ def _get_series_list(self, others, ignore_index=False): # `idx` of the calling Series/Index - i.e. must have matching length. # Objects with an index (i.e. Series/Index/DataFrame) keep their own # index, *unless* ignore_index is set to True. - if isinstance(others, Series): + if isinstance(others, ABCSeries): warn = not others.index.equals(idx) # only reconstruct Series when absolutely necessary los = [ Series(others.values, index=idx) if ignore_index and warn else others ] return (los, warn) - elif isinstance(others, Index): + elif isinstance(others, ABCIndexClass): warn = not others.equals(idx) los = [Series(others.values, index=(idx if ignore_index else others))] return (los, warn) @@ -2139,12 +2139,14 @@ def _get_series_list(self, others, ignore_index=False): # only allowing Series/Index/np.ndarray[1-dim] will greatly # simply this function post-deprecation. if not ( - isinstance(nxt, (Series, Index)) + isinstance(nxt, (Series, ABCIndexClass)) or (isinstance(nxt, np.ndarray) and nxt.ndim == 1) ): depr_warn = True - if not isinstance(nxt, (DataFrame, Series, Index, np.ndarray)): + if not isinstance( + nxt, (DataFrame, Series, ABCIndexClass, np.ndarray) + ): # safety for non-persistent list-likes (e.g. iterators) # do not map indexed/typed objects; info needed below nxt = list(nxt) @@ -2152,7 +2154,7 @@ def _get_series_list(self, others, ignore_index=False): # known types for which we can avoid deep inspection no_deep = ( isinstance(nxt, np.ndarray) and nxt.ndim == 1 - ) or isinstance(nxt, (Series, Index)) + ) or isinstance(nxt, (Series, ABCIndexClass)) # nested list-likes are forbidden: # -> elements of nxt must not be list-like is_legal = (no_deep and nxt.dtype == object) or all( @@ -2325,7 +2327,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): if sep is None: sep = "" - if isinstance(self._orig, Index): + if isinstance(self._orig, ABCIndexClass): data = Series(self._orig, index=self._orig) else: # Series data = self._orig @@ -2411,7 +2413,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): # no NaNs - can just concatenate result = cat_safe(all_cols, sep) - if isinstance(self._orig, Index): + if isinstance(self._orig, ABCIndexClass): # add dtype for case that result is all-NA result = Index(result, dtype=object, name=self._orig.name) else: # Series From 674e1386a9f2d83b42d21347a74a5e7086e1bf70 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 18:18:21 -0700 Subject: [PATCH 08/11] blackify --- pandas/core/dtypes/concat.py | 4 +--- pandas/tseries/offsets.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index d047ff5eb7747..ee5aa88cf2907 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -361,9 +361,7 @@ def _maybe_unwrap(x): new_codes = np.concatenate(codes) if sort_categories and not ignore_order and ordered: - raise TypeError( - "Cannot use sort_categories=True with ordered Categoricals" - ) + raise TypeError("Cannot use sort_categories=True with ordered Categoricals") if sort_categories and not categories.is_monotonic_increasing: categories = categories.sort_values() diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index ac3e92c772517..6d739cc83635d 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -9,6 +9,7 @@ from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, + Period, Timedelta, Timestamp, ccalendar, @@ -33,7 +34,6 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly -from pandas.core.dtypes.generic import ABCPeriod from pandas.core.dtypes.inference import is_list_like from pandas.core.tools.datetimes import to_datetime @@ -2539,7 +2539,7 @@ def __add__(self, other): return type(self)(self.n + other.n) else: return _delta_to_tick(self.delta + other.delta) - elif isinstance(other, ABCPeriod): + elif isinstance(other, Period): return other + self try: return self.apply(other) From ee229356f26c807df35be6095151b5b90b92e75e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 18:18:43 -0700 Subject: [PATCH 09/11] isort fixup --- pandas/core/arrays/categorical.py | 4 ++-- pandas/core/dtypes/dtypes.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 29fbcecd40b43..0cbcbb1ce4ba4 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -51,12 +51,12 @@ from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms from pandas.core.algorithms import ( + _get_data_algo, + _hashtables, factorize, take, take_1d, unique1d, - _get_data_algo, - _hashtables, ) from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index ec3fb7b2b3caf..bba551bd30a2d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -12,7 +12,7 @@ from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass from .base import ExtensionDtype -from .inference import is_list_like, is_bool +from .inference import is_bool, is_list_like str_type = str From 2eb9fd09ac79002f0cf28e690387cebb4e58b583 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 18 Jul 2019 21:07:09 -0700 Subject: [PATCH 10/11] more --- pandas/core/arrays/array_.py | 3 +-- pandas/core/arrays/datetimelike.py | 10 +++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py index 93ee570c1f971..314144db57712 100644 --- a/pandas/core/arrays/array_.py +++ b/pandas/core/arrays/array_.py @@ -212,7 +212,6 @@ def array( """ from pandas.core.arrays import ( period_array, - ExtensionArray, IntervalArray, PandasArray, DatetimeArray, @@ -226,7 +225,7 @@ def array( data = extract_array(data, extract_numpy=True) - if dtype is None and isinstance(data, ExtensionArray): + if dtype is None and isinstance(data, ABCExtensionArray): dtype = data.dtype # this returns None for not-found dtypes. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index df17388856117..98a745582e11b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -34,7 +34,12 @@ is_unsigned_integer_dtype, pandas_dtype, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndexClass, + ABCPeriodArray, + ABCSeries, +) from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna @@ -1664,11 +1669,10 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): i8 1d array """ from pandas import Index - from pandas.core.arrays import PeriodArray if lib.is_scalar(other) and isna(other): return iNaT - elif isinstance(other, (PeriodArray, ABCIndexClass, DatetimeLikeArrayMixin)): + elif isinstance(other, (ABCPeriodArray, ABCIndexClass, DatetimeLikeArrayMixin)): # convert tz if needed if getattr(other, "tz", None) is not None: if to_utc: From 0439f685a5e9d94121723decc7f015b8602f4366 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 20 Jul 2019 12:39:17 -0700 Subject: [PATCH 11/11] revert --- pandas/util/_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index ffab6d016a887..8d5f9f7749682 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -4,7 +4,7 @@ """ import warnings -from pandas._libs.lib import is_bool +from pandas.core.dtypes.common import is_bool def _check_arg_length(fname, args, max_fname_arg_count, compat_args):