From bbe248c473db80947650343d7ba15cfd0570f571 Mon Sep 17 00:00:00 2001 From: "Jonathan J. Helmus" Date: Fri, 13 Oct 2017 05:32:46 -0500 Subject: [PATCH 1/5] BUG: set tz on DTI from fixed format HDFStore (#17844) closes #17618 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/pytables.py | 7 +++++-- pandas/tests/io/test_pytables.py | 11 +++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c90b9939ce16d..d6bdf153e0368 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -941,6 +941,7 @@ Indexing I/O ^^^ +- Bug in :func:`read_hdf` when reading a timezone aware index from ``fixed`` format HDFStore (:issue:`17618`) - Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) - Bug in :func:`read_csv` in which specified column names were not being thoroughly de-duplicated (:issue:`17095`) - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 39d088e00b219..2af2816167829 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2391,8 +2391,11 @@ def _alias_to_class(self, alias): def _get_index_factory(self, klass): if klass == DatetimeIndex: def f(values, freq=None, tz=None): - return DatetimeIndex._simple_new(values, None, freq=freq, - tz=tz) + # data are already in UTC, localize and convert if tz present + result = DatetimeIndex._simple_new(values, None, freq=freq) + if tz is not None: + result = result.tz_localize('UTC').tz_convert(tz) + return result return f elif klass == PeriodIndex: def f(values, freq=None, tz=None): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2fe3cf1f34d44..6e3e338ce3de3 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2272,6 +2272,17 @@ def test_calendar_roundtrip_issue(self): result = store.select('table') assert_series_equal(result, s) + def test_roundtrip_tz_aware_index(self): + # GH 17618 + time = pd.Timestamp('2000-01-01 01:00:00', tz='US/Eastern') + df = pd.DataFrame(data=[0], index=[time]) + + with ensure_clean_store(self.path) as store: + store.put('frame', df, format='fixed') + recons = store['frame'] + tm.assert_frame_equal(recons, df) + assert recons.index[0].value == 946706400000000000 + def test_append_with_timedelta(self): # GH 3577 # append timedelta From 44b08f2a852b0201b797e068bb4acf2ed004b94e Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 13 Oct 2017 05:32:10 -0600 Subject: [PATCH 2/5] CLN: Use pandas.core.common for None checks (#17816) --- pandas/core/common.py | 27 ++++++++++++++++++++------- pandas/core/generic.py | 12 +++++------- pandas/core/groupby.py | 28 +++++++++++++--------------- pandas/core/indexes/api.py | 2 +- pandas/core/indexes/base.py | 13 ++++++------- pandas/core/indexes/multi.py | 5 +++-- pandas/core/indexes/range.py | 3 ++- pandas/core/panel.py | 16 +++++++++------- pandas/core/reshape/concat.py | 2 +- pandas/core/reshape/merge.py | 2 +- pandas/core/series.py | 5 +++-- pandas/core/window.py | 7 +++---- pandas/io/formats/excel.py | 4 ++-- pandas/io/formats/format.py | 8 ++++---- pandas/io/formats/style.py | 7 +++---- pandas/io/json/table_schema.py | 3 ++- pandas/io/pytables.py | 6 +++--- pandas/plotting/_core.py | 6 +++--- pandas/tests/frame/test_to_csv.py | 3 ++- pandas/tests/util/test_util.py | 3 ++- pandas/util/testing.py | 3 ++- 21 files changed, 90 insertions(+), 75 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index e0dc420bc53f8..7b96700313012 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -223,17 +223,36 @@ def _mut_exclusive(**kwargs): def _not_none(*args): + """Returns a generator consisting of the arguments that are not None""" return (arg for arg in args if arg is not None) def _any_none(*args): + """Returns a boolean indicating if any argument is None""" for arg in args: if arg is None: return True return False +def _all_none(*args): + """Returns a boolean indicating if all arguments are None""" + for arg in args: + if arg is not None: + return False + return True + + +def _any_not_none(*args): + """Returns a boolean indicating if any argument is not None""" + for arg in args: + if arg is not None: + return True + return False + + def _all_not_none(*args): + """Returns a boolean indicating if all arguments are not None""" for arg in args: if arg is None: return False @@ -241,6 +260,7 @@ def _all_not_none(*args): def _count_not_none(*args): + """Returns the count of arguments that are not None""" return sum(x is not None for x in args) @@ -459,13 +479,6 @@ def _apply_if_callable(maybe_callable, obj, **kwargs): return maybe_callable -def _all_none(*args): - for arg in args: - if arg is not None: - return False - return True - - def _where_compat(mask, arr1, arr2): if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE: new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8')) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index acc1bf1241bff..fc3982dba93ce 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -28,12 +28,10 @@ from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame - -from pandas.core.common import (_all_not_none, - _values_from_object, - _maybe_box_datetimelike, - SettingWithCopyError, SettingWithCopyWarning, - AbstractMethodError) +from pandas.core.common import (_all_not_none, _count_not_none, + _maybe_box_datetimelike, _values_from_object, + AbstractMethodError, SettingWithCopyError, + SettingWithCopyWarning) from pandas.core.base import PandasObject, SelectionMixin from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -3252,7 +3250,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): """ import re - nkw = sum([x is not None for x in [items, like, regex]]) + nkw = _count_not_none(items, like, regex) if nkw > 1: raise TypeError('Keyword arguments `items`, `like`, or `regex` ' 'are mutually exclusive') diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ccaf90b4482a7..3b7d3685db3b7 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -39,7 +39,8 @@ from pandas.core.dtypes.missing import isna, notna, _maybe_fill from pandas.core.common import (_values_from_object, AbstractMethodError, - _default_index) + _default_index, _not_none, _get_callable_name, + _asarray_tuplesafe) from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) @@ -60,7 +61,6 @@ from pandas.util._validators import validate_kwargs import pandas.core.algorithms as algorithms -import pandas.core.common as com from pandas.core.config import option_context from pandas.plotting._core import boxplot_frame_groupby @@ -877,10 +877,9 @@ def _concat_objects(self, keys, values, not_indexed_same=False): def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing - for v in values: - if v is not None: - ax = v._get_axis(self.axis) - ax._reset_identity() + for v in _not_none(*values): + ax = v._get_axis(self.axis) + ax._reset_identity() return values if not not_indexed_same: @@ -1806,7 +1805,7 @@ def apply(self, f, data, axis=0): group_keys = self._get_group_keys() # oh boy - f_name = com._get_callable_name(f) + f_name = _get_callable_name(f) if (f_name not in _plotting_methods and hasattr(splitter, 'fast_apply') and axis == 0): try: @@ -2533,7 +2532,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): - self.grouper = com._asarray_tuplesafe(self.grouper) + self.grouper = _asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): @@ -2739,7 +2738,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: - keys = [com._asarray_tuplesafe(keys)] + keys = [_asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: @@ -3028,7 +3027,7 @@ def _aggregate_multiple_funcs(self, arg, _level): columns.append(f) else: # protect against callables without names - columns.append(com._get_callable_name(f)) + columns.append(_get_callable_name(f)) arg = lzip(columns, arg) results = {} @@ -3686,14 +3685,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): key_names = self.grouper.names # GH12824. - def first_non_None_value(values): + def first_not_none(values): try: - v = next(v for v in values if v is not None) + return next(_not_none(*values)) except StopIteration: return None - return v - v = first_non_None_value(values) + v = first_not_none(values) if v is None: # GH9684. If all values are None, then this will throw an error. @@ -3726,7 +3724,7 @@ def first_non_None_value(values): key_index = None # make Nones an empty object - v = first_non_None_value(values) + v = first_not_none(values) if v is None: return DataFrame() elif isinstance(v, NDFrame): diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index d20a0b0a2c73d..08cda8a06ba64 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -123,7 +123,7 @@ def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make # the set hashable, then reverse on return consensus_names = set([tuple(i.names) for i in indexes - if any(n is not None for n in i.names)]) + if com._any_not_none(*i.names)]) if len(consensus_names) == 1: return list(list(consensus_names)[0]) return [None] * indexes[0].nlevels diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df0e963e7628d..c3343f149005c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -42,16 +42,15 @@ needs_i8_conversion, is_iterator, is_list_like, is_scalar) -from pandas.core.common import (is_bool_indexer, - _values_from_object, - _asarray_tuplesafe) +from pandas.core.common import (is_bool_indexer, _values_from_object, + _asarray_tuplesafe, _not_none, + _index_labels_to_array) from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base from pandas.util._decorators import ( Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList -import pandas.core.common as com import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing import pandas.core.algorithms as algos @@ -3168,8 +3167,8 @@ def _join_multi(self, other, how, return_indexers=True): other_is_mi = isinstance(other, MultiIndex) # figure out join names - self_names = [n for n in self.names if n is not None] - other_names = [n for n in other.names if n is not None] + self_names = _not_none(*self.names) + other_names = _not_none(*other.names) overlap = list(set(self_names) & set(other_names)) # need at least 1 in common, but not more than 1 @@ -3714,7 +3713,7 @@ def drop(self, labels, errors='raise'): ------- dropped : Index """ - labels = com._index_labels_to_array(labels) + labels = _index_labels_to_array(labels) indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 06b208b4d174e..4cc59f5297058 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -21,7 +21,8 @@ is_scalar) from pandas.core.dtypes.missing import isna, array_equivalent from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.core.common import (_values_from_object, +from pandas.core.common import (_any_not_none, + _values_from_object, is_bool_indexer, is_null_slice, is_true_slices) @@ -509,7 +510,7 @@ def _format_attrs(self): max_seq_items=False)), ('labels', ibase.default_pprint(self._labels, max_seq_items=False))] - if not all(name is None for name in self.names): + if _any_not_none(*self.names): attrs.append(('names', ibase.default_pprint(self.names))) if self.sortorder is not None: attrs.append(('sortorder', ibase.default_pprint(self.sortorder))) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9f7bac641ae08..b2e55d4826670 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -12,6 +12,7 @@ from pandas import compat from pandas.compat import lrange, range from pandas.compat.numpy import function as nv +from pandas.core.common import _all_none from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat @@ -83,7 +84,7 @@ def _ensure_int(value, field): return new_value - if start is None and stop is None and step is None: + if _all_none(start, stop, step): msg = "RangeIndex(...) must be called with integers" raise TypeError(msg) elif start is None: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1f22cb49d0196..997dd9c8e0f67 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -15,13 +15,13 @@ is_string_like, is_scalar) from pandas.core.dtypes.missing import notna -import pandas.core.common as com import pandas.core.ops as ops import pandas.core.missing as missing from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict) from pandas.compat.numpy import function as nv -from pandas.core.common import _try_sort, _default_index +from pandas.core.common import (_try_sort, _default_index, _all_not_none, + _any_not_none, _apply_if_callable) from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -166,7 +166,7 @@ def _init_data(self, data, copy, dtype, **kwargs): axes = None if isinstance(data, BlockManager): - if any(x is not None for x in passed_axes): + if _any_not_none(*passed_axes): axes = [x if x is not None else y for x, y in zip(passed_axes, data.axes)] mgr = data @@ -178,7 +178,7 @@ def _init_data(self, data, copy, dtype, **kwargs): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None - elif is_scalar(data) and all(x is not None for x in passed_axes): + elif is_scalar(data) and _all_not_none(*passed_axes): values = cast_scalar_to_array([len(x) for x in passed_axes], data, dtype=dtype) mgr = self._init_matrix(values, passed_axes, dtype=values.dtype, @@ -279,7 +279,7 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): return cls(**d) def __getitem__(self, key): - key = com._apply_if_callable(key, self) + key = _apply_if_callable(key, self) if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) @@ -594,7 +594,7 @@ def _box_item_values(self, key, values): return self._constructor_sliced(values, **d) def __setitem__(self, key, value): - key = com._apply_if_callable(key, self) + key = _apply_if_callable(key, self) shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): value = value.reindex( @@ -616,7 +616,9 @@ def __setitem__(self, key, value): def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" - _unpickle = com._unpickle_array + from pandas.io.pickle import _unpickle_array + + _unpickle = _unpickle_array vals, items, major, minor = state items = _unpickle(items) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c54763f8ebde1..e2c02bd0e71fb 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -241,7 +241,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, raise ValueError('No objects to concatenate') if keys is None: - objs = [obj for obj in objs if obj is not None] + objs = list(com._not_none(*objs)) else: # #1649 clean_keys = [] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6bb6988a7442a..e409090e76944 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1550,4 +1550,4 @@ def _should_fill(lname, rname): def _any(x): - return x is not None and len(x) > 0 and any([y is not None for y in x]) + return x is not None and com._any_not_none(*x) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8499f8b55d2d0..76baa89f165d4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -45,7 +45,8 @@ SettingWithCopyError, _maybe_box_datetimelike, _dict_compat, - standardize_mapping) + standardize_mapping, + _any_none) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -713,7 +714,7 @@ def _get_with(self, key): def _get_values_tuple(self, key): # mpl hackaround - if any(k is None for k in key): + if _any_none(*key): return self._get_values(key) if not isinstance(self.index, MultiIndex): diff --git a/pandas/core/window.py b/pandas/core/window.py index e3a091573aa2f..5143dddc5e866 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -32,7 +32,7 @@ from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) -import pandas.core.common as com +from pandas.core.common import _asarray_tuplesafe, _count_not_none import pandas._libs.window as _window from pandas import compat @@ -535,7 +535,7 @@ def _prep_window(self, **kwargs): window = self._get_window() if isinstance(window, (list, tuple, np.ndarray)): - return com._asarray_tuplesafe(window).astype(float) + return _asarray_tuplesafe(window).astype(float) elif is_integer(window): import scipy.signal as sig @@ -1972,8 +1972,7 @@ def dataframe_from_int_dict(data, frame_template): def _get_center_of_mass(com, span, halflife, alpha): - valid_count = len([x for x in [com, span, halflife, alpha] - if x is not None]) + valid_count = _count_not_none(com, span, halflife, alpha) if valid_count > 1: raise ValueError("com, span, halflife, and alpha " "are mutually exclusive") diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 9e888c38edaa7..af24537cabf90 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -10,6 +10,7 @@ from pandas.compat import reduce from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.printing import pprint_thing +from pandas.core.common import _any_not_none from pandas.core.dtypes.common import is_float import pandas._libs.lib as lib from pandas import Index, MultiIndex, PeriodIndex @@ -548,8 +549,7 @@ def _format_hierarchical_rows(self): self.rowcounter += 1 # if index labels are not empty go ahead and dump - if (any(x is not None for x in index_labels) and - self.header is not False): + if _any_not_none(*index_labels) and self.header is not False: for cidx, name in enumerate(index_labels): yield ExcelCell(self.rowcounter - 1, cidx, name, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4608c3fe0ceb8..c5d4a0ecf44ab 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -26,6 +26,7 @@ is_list_like) from pandas.core.dtypes.generic import ABCSparseArray from pandas.core.base import PandasObject +from pandas.core.common import _any_not_none, sentinel_factory from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat from pandas.compat import (StringIO, lzip, range, map, zip, u, @@ -36,7 +37,6 @@ _stringify_path) from pandas.io.formats.printing import adjoin, justify, pprint_thing from pandas.io.formats.common import get_level_lengths -import pandas.core.common as com import pandas._libs.lib as lib from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, format_array_from_datetime) @@ -1257,7 +1257,7 @@ def _column_header(): if self.fmt.sparsify: # GH3547 - sentinel = com.sentinel_factory() + sentinel = sentinel_factory() else: sentinel = None levels = self.columns.format(sparsify=sentinel, adjoin=False, @@ -1426,7 +1426,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): if self.fmt.sparsify: # GH3547 - sentinel = com.sentinel_factory() + sentinel = sentinel_factory() levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) @@ -2352,7 +2352,7 @@ def single_row_table(row): # pragma: no cover def _has_names(index): if isinstance(index, MultiIndex): - return any([x is not None for x in index.names]) + return _any_not_none(*index.names) else: return index.name is not None diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d7677e3642c26..2e87b3b925edd 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -27,7 +27,7 @@ from pandas.compat import range from pandas.core.config import get_option from pandas.core.generic import _shared_docs -import pandas.core.common as com +from pandas.core.common import _any_not_none, sentinel_factory from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice from pandas.util._decorators import Appender try: @@ -259,8 +259,7 @@ def format_attr(pair): row_es.append(es) head.append(row_es) - if self.data.index.names and not all(x is None - for x in self.data.index.names): + if self.data.index.names and _any_not_none(*self.data.index.names): index_header_row = [] for c, name in enumerate(self.data.index.names): @@ -1204,7 +1203,7 @@ def _get_level_lengths(index): Result is a dictionary of (level, inital_position): span """ - sentinel = com.sentinel_factory() + sentinel = sentinel_factory() levels = index.format(sparsify=sentinel, adjoin=False, names=False) if index.nlevels == 1: diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index c3865afa9c0c0..9cec5b3d6ba49 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -3,6 +3,7 @@ http://specs.frictionlessdata.io/json-table-schema/ """ +from pandas.core.common import _all_not_none from pandas.core.dtypes.common import ( is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -61,7 +62,7 @@ def as_json_table_type(x): def set_default_names(data): """Sets index names to 'index' for regular, or 'level_x' for Multi""" - if all(name is not None for name in data.index.names): + if _all_not_none(*data.index.names): return data data = data.copy() diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2af2816167829..40955c50f6b5f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -34,7 +34,7 @@ from pandas.core.base import StringMixin from pandas.io.formats.printing import adjoin, pprint_thing from pandas.errors import PerformanceWarning -from pandas.core.common import _asarray_tuplesafe +from pandas.core.common import _asarray_tuplesafe, _all_none from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical, _factorize_from_iterables from pandas.core.internals import (BlockManager, make_block, @@ -905,7 +905,7 @@ def remove(self, key, where=None, start=None, stop=None): raise KeyError('No object named %s in the file' % key) # remove the node - if where is None and start is None and stop is None: + if _all_none(where, start, stop): s.group._f_remove(recursive=True) # delete from the table @@ -2363,7 +2363,7 @@ def delete(self, where=None, start=None, stop=None, **kwargs): support fully deleting the node in its entirety (only) - where specification must be None """ - if where is None and start is None and stop is None: + if _all_none(where, start, stop): self._handle.remove_node(self.group, recursive=True) return None diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 0d77b5f41a08e..ad3c4f0ecb05f 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -20,7 +20,7 @@ is_iterator) from pandas.core.dtypes.generic import ABCSeries -from pandas.core.common import AbstractMethodError, _try_sort +from pandas.core.common import AbstractMethodError, _try_sort, _any_not_none from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex @@ -607,7 +607,7 @@ def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): def _get_index_name(self): if isinstance(self.data.index, MultiIndex): name = self.data.index.names - if any(x is not None for x in name): + if _any_not_none(*name): name = ','.join([pprint_thing(x) for x in name]) else: name = None @@ -955,7 +955,7 @@ def _make_plot(self): it = self._iter_data() stacking_id = self._get_stacking_id() - is_errorbar = any(e is not None for e in self.errors.values()) + is_errorbar = _any_not_none(*self.errors.values()) colors = self._get_colors() for i, (label, y) in enumerate(it): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index ab34ce877a726..a8449d6f874df 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -9,6 +9,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) +from pandas.core.common import _all_none from pandas.errors import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) @@ -570,7 +571,7 @@ def _make_frame(names=None): df = _make_frame(True) df.to_csv(path, tupleize_cols=False, index=False) result = read_csv(path, header=[0, 1], tupleize_cols=False) - assert all([x is None for x in result.columns.names]) + assert _all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index ffc9703abff41..659ce36de6bab 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -8,6 +8,7 @@ import pytest from pandas.compat import intern +from pandas.core.common import _all_none from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf from pandas.util._decorators import deprecate_kwarg, make_signature from pandas.util._validators import (validate_args, validate_kwargs, @@ -437,7 +438,7 @@ def test_set_locale(self): pytest.skip("Only a single locale found, no point in " "trying to test setting another locale") - if all(x is None for x in self.current_locale): + if _all_none(*self.current_locale): # Not sure why, but on some travis runs with pytest, # getlocale() returned (None, None). pytest.skip("Current locale is not set.") diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3c23462e10d35..730d2782e85d2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -33,6 +33,7 @@ is_list_like) from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d +from pandas.core.common import _all_not_none import pandas.compat as compat from pandas.compat import ( @@ -579,7 +580,7 @@ def set_locale(new_locale, lc_var=locale.LC_ALL): except ValueError: yield new_locale else: - if all(lc is not None for lc in normalized_locale): + if _all_not_none(*normalized_locale): yield '.'.join(normalized_locale) else: yield new_locale From e1904354c7e2ca02d86b99d4a210a9629c866c0a Mon Sep 17 00:00:00 2001 From: Brian Tu Date: Mon, 21 Aug 2017 19:13:36 +0000 Subject: [PATCH 3/5] ENH: tolerance now takes list-like argument for reindex and get_indexer. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/generic.py | 17 +- pandas/core/indexes/base.py | 171 ++++++------------ pandas/core/indexes/datetimelike.py | 14 +- pandas/core/indexes/datetimes.py | 9 +- pandas/core/indexes/numeric.py | 21 ++- pandas/core/indexes/period.py | 24 ++- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/tools/timedeltas.py | 8 +- pandas/tests/frame/test_indexing.py | 9 + .../tests/indexes/datetimes/test_datetime.py | 22 ++- pandas/tests/indexes/period/test_period.py | 24 ++- pandas/tests/indexes/test_base.py | 78 +++++--- pandas/tests/indexes/test_numeric.py | 8 + .../indexes/timedeltas/test_timedelta.py | 10 +- pandas/tests/series/test_indexing.py | 8 + pandas/tests/sparse/test_indexing.py | 5 + 17 files changed, 264 insertions(+), 167 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d6bdf153e0368..e20a778d682d1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -234,6 +234,7 @@ Other Enhancements - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names. (:issue:`14207`) - Improved the import time of pandas by about 2.25x. (:issue:`16764`) - :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`) +- :func:`Series.reindex`, :func:`DataFrame.reindex`, :func:`Index.get_indexer` now support list-like argument for ``tolerance``. (:issue:`17367`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc3982dba93ce..5a311afc27c9a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2470,9 +2470,10 @@ def reindex_like(self, other, method=None, copy=True, limit=None, Maximum number of consecutive labels to fill for inexact matches. tolerance : optional Maximum distance between labels of the other object and this - object for inexact matches. + object for inexact matches. Can be list-like. .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Notes ----- @@ -2860,7 +2861,14 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Examples -------- @@ -3120,7 +3128,14 @@ def _reindex_multi(self, axes, copy, fill_value): matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Examples -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c3343f149005c..5729a00ffdd4b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7,7 +7,6 @@ algos as libalgos, join as libjoin, Timestamp, Timedelta, ) from pandas._libs.lib import is_datetime_array -from pandas._libs.tslibs import parsing from pandas.compat import range, u from pandas.compat.numpy import function as nv @@ -28,7 +27,6 @@ is_integer, is_float, is_dtype_equal, - is_dtype_union_equal, is_object_dtype, is_categorical_dtype, is_interval_dtype, @@ -42,22 +40,23 @@ needs_i8_conversion, is_iterator, is_list_like, is_scalar) -from pandas.core.common import (is_bool_indexer, _values_from_object, - _asarray_tuplesafe, _not_none, - _index_labels_to_array) +from pandas.core.common import (is_bool_indexer, + _values_from_object, + _asarray_tuplesafe) from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base from pandas.util._decorators import ( Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList +import pandas.core.common as com import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing import pandas.core.algorithms as algos import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core import strings, accessor +from pandas.core import strings from pandas.core.config import get_option @@ -122,23 +121,6 @@ class Index(IndexOpsMixin, PandasObject): Notes ----- An Index instance can **only** contain hashable objects - - Examples - -------- - >>> pd.Index([1, 2, 3]) - Int64Index([1, 2, 3], dtype='int64') - - >>> pd.Index(list('abc')) - Index(['a', 'b', 'c'], dtype='object') - - See Also - --------- - RangeIndex : Index implementing a monotonic integer range - CategoricalIndex : Index of :class:`Categorical` s. - MultiIndex : A multi-level, or hierarchical, Index - IntervalIndex : an Index of :class:`Interval` s. - DatetimeIndex, TimedeltaIndex, PeriodIndex - Int64Index, UInt64Index, Float64Index """ # To hand over control to subclasses _join_precedence = 1 @@ -176,7 +158,7 @@ class Index(IndexOpsMixin, PandasObject): _accessors = frozenset(['str']) # String Methods - str = accessor.AccessorProperty(strings.StringMethods) + str = base.AccessorProperty(strings.StringMethods) def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): @@ -603,6 +585,12 @@ def memory_usage(self, deep=False): return result # ops compat + def tolist(self): + """ + return a list of the Index values + """ + return list(self.values) + @deprecate_kwarg(old_arg_name='n', new_arg_name='repeats') def repeat(self, repeats, *args, **kwargs): """ @@ -865,7 +853,7 @@ def _formatter_func(self): """ return default_pprint - def _format_data(self, name=None): + def _format_data(self): """ Return the formatted data as a unicode string """ @@ -874,11 +862,9 @@ def _format_data(self, name=None): display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 - if name is None: - name = self.__class__.__name__ - space1 = "\n%s" % (' ' * (len(name) + 1)) - space2 = "\n%s" % (' ' * (len(name) + 2)) + space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) n = len(self) sep = ',' @@ -1004,29 +990,6 @@ def to_series(self, **kwargs): index=self._shallow_copy(), name=self.name) - def to_frame(self, index=True): - """ - Create a DataFrame with a column containing the Index. - - .. versionadded:: 0.21.0 - - Parameters - ---------- - index : boolean, default True - Set the index of the returned DataFrame as the original Index. - - Returns - ------- - DataFrame : a DataFrame containing the original Index data. - """ - - from pandas import DataFrame - result = DataFrame(self._shallow_copy(), columns=[self.name or 0]) - - if index: - result.index = self - return result - def _to_embed(self, keep_tz=False): """ *this is an internal non-public method* @@ -1077,7 +1040,7 @@ def to_datetime(self, dayfirst=False): if self.inferred_type == 'string': from dateutil.parser import parse parser = lambda x: parse(x, dayfirst=dayfirst) - parsed = parsing.try_parse_dates(self.values, parser=parser) + parsed = lib.try_parse_dates(self.values, parser=parser) return DatetimeIndex(parsed) else: return DatetimeIndex(self.values) @@ -1638,6 +1601,9 @@ def is_all_dates(self): return False return is_datetime_array(_ensure_object(self.values)) + def __iter__(self): + return iter(self.values) + def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) @@ -2213,11 +2179,7 @@ def union(self, other): if len(self) == 0: return other._get_consensus_name(self) - # TODO: is_dtype_union_equal is a hack around - # 1. buggy set ops with duplicates (GH #13432) - # 2. CategoricalIndex lacking setops (GH #10186) - # Once those are fixed, this workaround can be removed - if not is_dtype_union_equal(self.dtype, other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.union(other) @@ -2468,7 +2430,7 @@ def _get_unique_index(self, dropna=False): return self._shallow_copy(values) _index_shared_docs['get_loc'] = """ - Get integer location, slice or boolean mask for requested label. + Get integer location for requested label. Parameters ---------- @@ -2484,26 +2446,19 @@ def _get_unique_index(self, dropna=False): the index at the matching location most satisfy the equation ``abs(index[loc] - key) <= tolerance``. + Tolerance may be a scalar + value, which applies the same tolerance to all values, or + list-like, which applies variable tolerance per element. List-like + includes list, tuple, array, Series, and must be the same size as + the index and its dtype must exactly match the index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Returns ------- - loc : int if unique index, slice if monotonic index, else mask - - Examples - --------- - >>> unique_index = pd.Index(list('abc')) - >>> unique_index.get_loc('b') - 1 - - >>> monotonic_index = pd.Index(list('abbc')) - >>> monotonic_index.get_loc('b') - slice(1, 3, None) - - >>> non_monotonic_index = pd.Index(list('abcb')) - >>> non_monotonic_index.get_loc('b') - array([False, True, False, True], dtype=bool) - """ + loc : int if unique index, possibly slice or mask if not + """ @Appender(_index_shared_docs['get_loc']) def get_loc(self, key, method=None, tolerance=None): @@ -2581,23 +2536,15 @@ def set_value(self, arr, key, value): def _get_level_values(self, level): """ Return an Index of values for requested level, equal to the length - of the index. + of the index Parameters ---------- - level : int or str - ``level`` is either the integer position of the level in the - MultiIndex, or the name of the level. + level : int Returns ------- values : Index - ``self``, as there is only one level in the Index. - - See also - --------- - pandas.MultiIndex.get_level_values : get values for a level of a - MultiIndex """ self._validate_index_level(level) @@ -2627,7 +2574,14 @@ def _get_level_values(self, level): matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Examples -------- @@ -2647,13 +2601,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) if tolerance is not None: - tolerance = self._convert_tolerance(tolerance) - - # Treat boolean labels passed to a numeric index as not found. Without - # this fix False and True would be treated as 0 and 1 respectively. - # (GH #16877) - if target.is_boolean() and self.is_numeric(): - return _ensure_platform_int(np.repeat(-1, target.size)) + tolerance = self._convert_tolerance(tolerance, target) pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: @@ -2683,10 +2631,15 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): 'backfill or nearest reindexing') indexer = self._engine.get_indexer(target._values) + return _ensure_platform_int(indexer) - def _convert_tolerance(self, tolerance): + def _convert_tolerance(self, tolerance, target): # override this method on subclasses + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') return tolerance def _get_fill_indexer(self, target, method, limit=None, tolerance=None): @@ -3167,8 +3120,8 @@ def _join_multi(self, other, how, return_indexers=True): other_is_mi = isinstance(other, MultiIndex) # figure out join names - self_names = _not_none(*self.names) - other_names = _not_none(*other.names) + self_names = [n for n in self.names if n is not None] + other_names = [n for n in other.names if n is not None] overlap = list(set(self_names) & set(other_names)) # need at least 1 in common, but not more than 1 @@ -3522,7 +3475,7 @@ def _searchsorted_monotonic(self, label, side='left'): # everything for it to work (element ordering, search side and # resulting value). pos = self[::-1].searchsorted(label, side='right' if side == 'left' - else 'left') + else 'right') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing') @@ -3611,19 +3564,6 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): ------- start, end : int - Notes - ----- - This method only works if the index is monotonic or unique. - - Examples - --------- - >>> idx = pd.Index(list('abcd')) - >>> idx.slice_locs(start='b', end='c') - (1, 3) - - See Also - -------- - Index.get_loc : Get location for a single label """ inc = (step is None or step >= 0) @@ -3713,7 +3653,7 @@ def drop(self, labels, errors='raise'): ------- dropped : Index """ - labels = _index_labels_to_array(labels) + labels = com._index_labels_to_array(labels) indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): @@ -3794,7 +3734,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): def _evaluate_with_datetime_like(self, other, op, opstr): raise TypeError("can only perform ops with datetime like values") - def _evaluate_compare(self, op): + def _evalute_compare(self, op): raise base.AbstractMethodError(self) @classmethod @@ -4129,6 +4069,15 @@ def _ensure_index_from_sequences(sequences, names=None): else: return MultiIndex.from_arrays(sequences, names=names) +def _list_to_ndarray(a): + """Convert list-like to np.ndarray, otherwise leave as-is. + Used for converting tolerance to ndarray in _convert_tolerance. + """ + if isinstance(a, ABCSeries): + return a.values + elif isinstance(a, (list, tuple)): + return np.array(a) + return a def _ensure_index(index_like, copy=False): """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d5b4525e8a1eb..5d40975586e73 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -7,6 +7,7 @@ from pandas import compat from pandas.compat.numpy import function as nv +from pandas.core.tools.timedeltas import to_timedelta import numpy as np from pandas.core.dtypes.common import ( @@ -431,13 +432,12 @@ def asobject(self): from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object) - def _convert_tolerance(self, tolerance): - try: - return Timedelta(tolerance).to_timedelta64() - except ValueError: - raise ValueError('tolerance argument for %s must be convertible ' - 'to Timedelta: %r' - % (type(self).__name__, tolerance)) + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(to_timedelta(tolerance, box=False)) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') + return tolerance def _maybe_mask_results(self, result, fill_value=None, convert=None): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 25897bee29845..d16251a7829b9 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1423,7 +1423,7 @@ def get_loc(self, key, method=None, tolerance=None): if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by # the try/except clauses below - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) if isinstance(key, datetime): # needed to localize naive datetimes @@ -1447,7 +1447,12 @@ def get_loc(self, key, method=None, tolerance=None): try: stamp = Timestamp(key, tz=self.tz) return Index.get_loc(self, stamp, method, tolerance) - except (KeyError, ValueError): + except KeyError: + raise KeyError(key) + except ValueError as e: + # list-like tolerance size must match target index size + if 'list-like' in str(e): + raise e raise KeyError(key) def _maybe_cast_slice_bound(self, label, side, kind): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 9fc47ad7b773c..1f007b1961e06 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -71,12 +71,21 @@ def _convert_for_op(self, value): return value - def _convert_tolerance(self, tolerance): - try: - return float(tolerance) - except ValueError: - raise ValueError('tolerance argument for %s must be numeric: %r' % - (type(self).__name__, tolerance)) + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') + if not np.issubdtype(tolerance.dtype, np.number): + if tolerance.ndim > 0: + raise ValueError(('tolerance argument for %s must contain ' + 'numeric elements if it is list type') % + (type(self).__name__,)) + else: + raise ValueError(('tolerance argument for %s must be numeric ' + 'if it is a scalar: %r') % + (type(self).__name__, tolerance)) + return tolerance @classmethod def _assert_safe_casting(cls, data, subarr): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b70b4c4e4067c..148ca2725fbdc 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -641,12 +641,17 @@ def to_timestamp(self, freq=None, how='start'): return DatetimeIndex(new_data, freq='infer', name=self.name) def _maybe_convert_timedelta(self, other): - if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): + if isinstance( + other, (timedelta, np.timedelta64, offsets.Tick, np.ndarray)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): - nanos = tslib._delta_to_nanoseconds(other) + if isinstance(other, np.ndarray): + nanos = np.vectorize(tslib._delta_to_nanoseconds)(other) + else: + nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) - if nanos % offset_nanos == 0: + check = np.all(nanos % offset_nanos == 0) + if check: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = other.rule_code @@ -782,7 +787,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): target = target.asi8 if tolerance is not None: - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, target) return Index.get_indexer(self._int64index, target, method, limit, tolerance) @@ -825,7 +830,8 @@ def get_loc(self, key, method=None, tolerance=None): try: ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal if tolerance is not None: - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, + np.asarray(key)) return self._int64index.get_loc(ordinal, method, tolerance) except KeyError: @@ -908,8 +914,12 @@ def _get_string_slice(self, key): return slice(self.searchsorted(t1.ordinal, side='left'), self.searchsorted(t2.ordinal, side='right')) - def _convert_tolerance(self, tolerance): - tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance) + def _convert_tolerance(self, tolerance, target): + tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, + target) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') return self._maybe_convert_timedelta(tolerance) def insert(self, loc, item): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 89757c2bf40da..6e08c32f30dcd 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -699,7 +699,7 @@ def get_loc(self, key, method=None, tolerance=None): if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by # the try/except clauses below - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) if _is_convertible_to_td(key): key = Timedelta(key) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index d5132826bb93f..2791b513b88f3 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -83,8 +83,12 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, unit=unit, box=box, errors=errors, name=arg.name) - elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1: - return _convert_listlike(arg, unit=unit, box=box, errors=errors) + elif is_list_like(arg) and getattr(arg, 'ndim', 1) <= 1: + if getattr(arg, 'ndim', 1) == 0: + # extract array scalar and process below + arg = arg.item() + else: + return _convert_listlike(arg, unit=unit, box=box, errors=errors) elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, timedelta, list, tuple, ' '1-d array, or Series') diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 1a16e4ef48b64..f850b8f2ee178 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1935,9 +1935,13 @@ def test_reindex_methods(self): actual = df.reindex_like(df, method=method, tolerance=0) assert_frame_equal(df, actual) + actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) + assert_frame_equal(df, actual) actual = df.reindex(target, method=method, tolerance=1) assert_frame_equal(expected, actual) + actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) + assert_frame_equal(expected, actual) e2 = expected[::-1] actual = df.reindex(target[::-1], method=method) @@ -1958,6 +1962,11 @@ def test_reindex_methods(self): actual = df.reindex(target, method='nearest', tolerance=0.2) assert_frame_equal(expected, actual) + expected = pd.DataFrame({'x': [0, np.nan, 1, np.nan]}, index=target) + actual = df.reindex(target, method='nearest', + tolerance=[0.5, 0.01, 0.4, 0.1]) + assert_frame_equal(expected, actual) + def test_reindex_frame_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 47f53f53cfd02..8d9ac59cf9883 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -41,10 +41,17 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-01T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with pytest.raises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') + with pytest.raises( + ValueError, + match='tolerance size must match target index size'): + idx.get_loc('2000-01-01', method='nearest', + tolerance=[pd.Timedelta('1day').to_timedelta64(), + pd.Timedelta('1day').to_timedelta64()]) assert idx.get_loc('2000', method='nearest') == slice(0, 3) assert idx.get_loc('2000-01', method='nearest') == slice(0, 3) @@ -93,6 +100,19 @@ def test_get_indexer(self): idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), np.array([0, -1, 1], dtype=np.intp)) + tol_raw = [pd.Timedelta('1 hour'), + pd.Timedelta('1 hour'), + pd.Timedelta('1 hour').to_timedelta64(), ] + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=[np.timedelta64(x) for x in tol_raw]), + np.array([0, -1, 1], dtype=np.intp)) + tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), + pd.Timedelta('1 hour').to_timedelta64(), + 'foo', ] + with pytest.raises( + ValueError, match='abbreviation w/o a number'): + idx.get_indexer(target, 'nearest', tolerance=tol_bad) with pytest.raises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 51f7d13cb0638..ae500e66359b4 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -9,6 +9,7 @@ from pandas import (PeriodIndex, period_range, notna, DatetimeIndex, NaT, Index, Period, Int64Index, Series, DataFrame, date_range, offsets, compat) +from pandas.core.indexes.period import IncompatibleFrequency from ..datetimelike import DatetimeLike @@ -83,7 +84,8 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-02T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' @@ -91,6 +93,12 @@ def test_get_loc(self): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with pytest.raises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + with pytest.raises( + ValueError, + match='list-like tolerance size must match target index size'): + idx.get_loc('2000-01-10', method='nearest', + tolerance=[pd.Timedelta('1 day').to_timedelta64(), + pd.Timedelta('1 day').to_timedelta64()]) def test_where(self): i = self.create_index() @@ -158,6 +166,20 @@ def test_get_indexer(self): tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', tolerance='1 day'), np.array([0, 1, 1], dtype=np.intp)) + tol_raw = [pd.Timedelta('1 hour'), + pd.Timedelta('1 hour'), + np.timedelta64(1, 'D'), ] + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=[np.timedelta64(x) for x in tol_raw]), + np.array([0, -1, 1], dtype=np.intp)) + tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), + pd.Timedelta('1 hour').to_timedelta64(), + np.timedelta64(1, 'M'), ] + with pytest.raises( + IncompatibleFrequency, + match='Input has different freq from'): + idx.get_indexer(target, 'nearest', tolerance=tol_bad) def test_repeat(self): # GH10183 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 81f113d58d680..307cda7f2d1cb 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1075,40 +1075,58 @@ def test_get_indexer_invalid(self): with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], limit=1) - def test_get_indexer_nearest(self): + @pytest.mark.parametrize( + 'method, tolerance, indexer, expected', + [ + ('pad', None, [0, 5, 9], [0, 5, 9]), + ('backfill', None, [0, 5, 9], [0, 5, 9]), + ('nearest', None, [0, 5, 9], [0, 5, 9]), + ('pad', 0, [0, 5, 9], [0, 5, 9]), + ('backfill', 0, [0, 5, 9], [0, 5, 9]), + ('nearest', 0, [0, 5, 9], [0, 5, 9]), + + ('pad', None, [0.2, 1.8, 8.5], [0, 1, 8]), + ('backfill', None, [0.2, 1.8, 8.5], [1, 2, 9]), + ('nearest', None, [0.2, 1.8, 8.5], [0, 2, 9]), + ('pad', 1, [0.2, 1.8, 8.5], [0, 1, 8]), + ('backfill', 1, [0.2, 1.8, 8.5], [1, 2, 9]), + ('nearest', 1, [0.2, 1.8, 8.5], [0, 2, 9]), + + ('pad', 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), + ('backfill', 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), + ('nearest', 0.2, [0.2, 1.8, 8.5], [0, 2, -1])]) + def test_get_indexer_nearest(self, method, tolerance, indexer, expected): idx = Index(np.arange(10)) - all_methods = ['pad', 'backfill', 'nearest'] - for method in all_methods: - actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], - dtype=np.intp)) - - actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], - dtype=np.intp)) - - for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], - [0, 2, 9]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) - - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=1) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) + actual = idx.get_indexer(indexer, method=method, tolerance=tolerance) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) + + @pytest.mark.parametrize('listtype', [list, tuple, Series, np.array]) + @pytest.mark.parametrize( + 'tolerance, expected', + list(zip([[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], + [0.1, 0.5, 0.5]], + [[0, 2, -1], [0, -1, -1], + [-1, 2, 9]]))) + def test_get_indexer_nearest_listlike_tolerance(self, tolerance, + expected, listtype): + idx = Index(np.arange(10)) - for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], - [0, 2, -1]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=0.2) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) + actual = idx.get_indexer([0.2, 1.8, 8.5], method='nearest', + tolerance=listtype(tolerance)) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) + def test_get_indexer_nearest_error(self): + idx = Index(np.arange(10)) with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], method='nearest', limit=1) + with pytest.raises(ValueError, match='tolerance size must match'): + idx.get_indexer([1, 0], method='nearest', + tolerance=[1, 2, 3]) + def test_get_indexer_nearest_decreasing(self): idx = Index(np.arange(10))[::-1] @@ -1141,6 +1159,10 @@ def test_get_indexer_strings(self): with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + with pytest.raises(TypeError): + idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', + tolerance=[2, 2, 2, 2]) + def test_get_indexer_numeric_index_boolean_target(self): # GH 16877 numeric_idx = pd.Index(range(4)) @@ -1172,6 +1194,8 @@ def test_get_loc(self): idx.get_loc(1.1, 'nearest', tolerance='invalid') with tm.assert_raises_regex(ValueError, 'tolerance .* valid if'): idx.get_loc(1.1, tolerance=1) + with pytest.raises(ValueError, match='tolerance size must match'): + idx.get_loc(1.1, 'nearest', tolerance=[1, 1]) idx = pd.Index(['a', 'c']) with pytest.raises(TypeError): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index dc38b0a2b1fb7..a96c677852339 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -354,6 +354,14 @@ def test_get_loc(self): with tm.assert_raises_regex(ValueError, 'must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') + with pytest.raises(ValueError, match='must contain numeric elements'): + idx.get_loc(1.4, method='nearest', tolerance=np.array(['foo'])) + + with pytest.raises( + ValueError, + match='tolerance size must match target index size'): + idx.get_loc(1.4, method='nearest', tolerance=np.array([1, 2])) + def test_get_loc_na(self): idx = Float64Index([np.nan, 1, 2]) assert idx.get_loc(1) == 1 diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 0b3bd0b03bccf..0a09199eca9d5 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -60,9 +60,17 @@ def test_get_loc(self): assert idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)) == 1 - with tm.assert_raises_regex(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): idx.get_loc(idx[1], method='nearest', tolerance='foo') + with pytest.raises( + ValueError, + match='tolerance size must match'): + idx.get_loc(idx[1], method='nearest', + tolerance=[Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64()]) + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: assert idx.get_loc('1 day 1 hour', method) == loc diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 75ae47ed2fdc1..d141b378fe214 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -2117,11 +2117,19 @@ def test_reindex_nearest(self): actual = s.reindex_like(actual, method='nearest', tolerance=1) assert_series_equal(expected, actual) + actual = s.reindex_like(actual, method='nearest', + tolerance=[1, 2, 3, 4]) + assert_series_equal(expected, actual) actual = s.reindex(target, method='nearest', tolerance=0.2) expected = Series([0, 1, np.nan, 2], target) assert_series_equal(expected, actual) + actual = s.reindex(target, method='nearest', + tolerance=[0.3, 0.01, 0.4, 3]) + expected = Series([0, np.nan, np.nan, 2], target) + assert_series_equal(expected, actual) + def test_reindex_backfill(self): pass diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index edbac8f09241b..37a287af71451 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -414,6 +414,11 @@ def test_reindex_nearest(self): expected = pd.Series([0, 1, np.nan, 2], target).to_sparse() tm.assert_sp_series_equal(expected, actual) + actual = s.reindex(target, method='nearest', + tolerance=[0.3, 0.01, 0.4, 3]) + expected = pd.Series([0, np.nan, np.nan, 2], target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + def tests_indexing_with_sparse(self): # GH 13985 From 2b549b1754d6861abc4f926aa0b5a0a6b9a8429c Mon Sep 17 00:00:00 2001 From: Brian Tu Date: Sat, 14 Oct 2017 13:03:46 -0400 Subject: [PATCH 4/5] Add back stuff accidentally removed on rebase --- pandas/core/indexes/base.py | 135 +++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 32 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1a937266b72c7..a995fc10a6674 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7,6 +7,7 @@ algos as libalgos, join as libjoin, Timestamp, Timedelta, ) from pandas._libs.lib import is_datetime_array +from pandas._libs.tslibs import parsing from pandas.compat import range, u from pandas.compat.numpy import function as nv @@ -27,6 +28,7 @@ is_integer, is_float, is_dtype_equal, + is_dtype_union_equal, is_object_dtype, is_categorical_dtype, is_interval_dtype, @@ -55,7 +57,7 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core import strings +from pandas.core import strings, accessor from pandas.core.config import get_option @@ -120,6 +122,23 @@ class Index(IndexOpsMixin, PandasObject): Notes ----- An Index instance can **only** contain hashable objects + + Examples + -------- + >>> pd.Index([1, 2, 3]) + Int64Index([1, 2, 3], dtype='int64') + + >>> pd.Index(list('abc')) + Index(['a', 'b', 'c'], dtype='object') + + See Also + --------- + RangeIndex : Index implementing a monotonic integer range + CategoricalIndex : Index of :class:`Categorical` s. + MultiIndex : A multi-level, or hierarchical, Index + IntervalIndex : an Index of :class:`Interval` s. + DatetimeIndex, TimedeltaIndex, PeriodIndex + Int64Index, UInt64Index, Float64Index """ # To hand over control to subclasses _join_precedence = 1 @@ -157,7 +176,7 @@ class Index(IndexOpsMixin, PandasObject): _accessors = frozenset(['str']) # String Methods - str = base.AccessorProperty(strings.StringMethods) + str = accessor.AccessorProperty(strings.StringMethods) def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): @@ -584,12 +603,6 @@ def memory_usage(self, deep=False): return result # ops compat - def tolist(self): - """ - return a list of the Index values - """ - return list(self.values) - @deprecate_kwarg(old_arg_name='n', new_arg_name='repeats') def repeat(self, repeats, *args, **kwargs): """ @@ -852,7 +865,7 @@ def _formatter_func(self): """ return default_pprint - def _format_data(self): + def _format_data(self, name=None): """ Return the formatted data as a unicode string """ @@ -861,9 +874,11 @@ def _format_data(self): display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 + if name is None: + name = self.__class__.__name__ - space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) - space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) + space1 = "\n%s" % (' ' * (len(name) + 1)) + space2 = "\n%s" % (' ' * (len(name) + 2)) n = len(self) sep = ',' @@ -989,6 +1004,29 @@ def to_series(self, **kwargs): index=self._shallow_copy(), name=self.name) + def to_frame(self, index=True): + """ + Create a DataFrame with a column containing the Index. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + index : boolean, default True + Set the index of the returned DataFrame as the original Index. + + Returns + ------- + DataFrame : a DataFrame containing the original Index data. + """ + + from pandas import DataFrame + result = DataFrame(self._shallow_copy(), columns=[self.name or 0]) + + if index: + result.index = self + return result + def _to_embed(self, keep_tz=False): """ *this is an internal non-public method* @@ -1039,7 +1077,7 @@ def to_datetime(self, dayfirst=False): if self.inferred_type == 'string': from dateutil.parser import parse parser = lambda x: parse(x, dayfirst=dayfirst) - parsed = lib.try_parse_dates(self.values, parser=parser) + parsed = parsing.try_parse_dates(self.values, parser=parser) return DatetimeIndex(parsed) else: return DatetimeIndex(self.values) @@ -1600,9 +1638,6 @@ def is_all_dates(self): return False return is_datetime_array(_ensure_object(self.values)) - def __iter__(self): - return iter(self.values) - def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) @@ -2178,7 +2213,11 @@ def union(self, other): if len(self) == 0: return other._get_consensus_name(self) - if not is_dtype_equal(self.dtype, other.dtype): + # TODO: is_dtype_union_equal is a hack around + # 1. buggy set ops with duplicates (GH #13432) + # 2. CategoricalIndex lacking setops (GH #10186) + # Once those are fixed, this workaround can be removed + if not is_dtype_union_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.union(other) @@ -2429,7 +2468,7 @@ def _get_unique_index(self, dropna=False): return self._shallow_copy(values) _index_shared_docs['get_loc'] = """ - Get integer location for requested label. + Get integer location, slice or boolean mask for requested label. Parameters ---------- @@ -2456,8 +2495,22 @@ def _get_unique_index(self, dropna=False): Returns ------- - loc : int if unique index, possibly slice or mask if not - """ + loc : int if unique index, slice if monotonic index, else mask + + Examples + --------- + >>> unique_index = pd.Index(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = pd.Index(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = pd.Index(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True], dtype=bool) + """ @Appender(_index_shared_docs['get_loc']) def get_loc(self, key, method=None, tolerance=None): @@ -2535,15 +2588,23 @@ def set_value(self, arr, key, value): def _get_level_values(self, level): """ Return an Index of values for requested level, equal to the length - of the index + of the index. Parameters ---------- - level : int + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. Returns ------- values : Index + ``self``, as there is only one level in the Index. + + See also + --------- + pandas.MultiIndex.get_level_values : get values for a level of a + MultiIndex """ self._validate_index_level(level) @@ -2602,6 +2663,12 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: tolerance = self._convert_tolerance(tolerance, target) + # Treat boolean labels passed to a numeric index as not found. Without + # this fix False and True would be treated as 0 and 1 respectively. + # (GH #16877) + if target.is_boolean() and self.is_numeric(): + return _ensure_platform_int(np.repeat(-1, target.size)) + pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer(ptarget, method=method, limit=limit, @@ -3474,7 +3541,7 @@ def _searchsorted_monotonic(self, label, side='left'): # everything for it to work (element ordering, search side and # resulting value). pos = self[::-1].searchsorted(label, side='right' if side == 'left' - else 'right') + else 'left') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing') @@ -3563,6 +3630,19 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): ------- start, end : int + Notes + ----- + This method only works if the index is monotonic or unique. + + Examples + --------- + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_locs(start='b', end='c') + (1, 3) + + See Also + -------- + Index.get_loc : Get location for a single label """ inc = (step is None or step >= 0) @@ -3733,7 +3813,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): def _evaluate_with_datetime_like(self, other, op, opstr): raise TypeError("can only perform ops with datetime like values") - def _evalute_compare(self, op): + def _evaluate_compare(self, op): raise base.AbstractMethodError(self) @classmethod @@ -4068,15 +4148,6 @@ def _ensure_index_from_sequences(sequences, names=None): else: return MultiIndex.from_arrays(sequences, names=names) -def _list_to_ndarray(a): - """Convert list-like to np.ndarray, otherwise leave as-is. - Used for converting tolerance to ndarray in _convert_tolerance. - """ - if isinstance(a, ABCSeries): - return a.values - elif isinstance(a, (list, tuple)): - return np.array(a) - return a def _ensure_index(index_like, copy=False): """ From 7e7051a3e66b2ab1ae6e75fcb9194d83c247ed38 Mon Sep 17 00:00:00 2001 From: Brian Tu Date: Sat, 14 Oct 2017 15:09:27 -0400 Subject: [PATCH 5/5] Split timedelta condition for array scalar into separate if statement --- pandas/core/tools/timedeltas.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 2791b513b88f3..f61d9f90d6ca2 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -83,12 +83,11 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, unit=unit, box=box, errors=errors, name=arg.name) - elif is_list_like(arg) and getattr(arg, 'ndim', 1) <= 1: - if getattr(arg, 'ndim', 1) == 0: - # extract array scalar and process below - arg = arg.item() - else: - return _convert_listlike(arg, unit=unit, box=box, errors=errors) + elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 0: + # extract array scalar and process below + arg = arg.item() + elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1: + return _convert_listlike(arg, unit=unit, box=box, errors=errors) elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, timedelta, list, tuple, ' '1-d array, or Series')