Skip to content

Remove offset/DTI caching (disabled since 0.14 #23118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,7 @@ Other API Changes
- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)

.. _whatsnew_0240.deprecations:

Expand Down
11 changes: 0 additions & 11 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,6 @@ class ApplyTypeError(TypeError):
pass


# TODO: unused. remove?
class CacheableOffset(object):
_cacheable = True


# ---------------------------------------------------------------------
# Base Classes

Expand All @@ -296,8 +291,6 @@ class _BaseOffset(object):
and will (after pickle errors are resolved) go into a cdef class.
"""
_typ = "dateoffset"
_normalize_cache = True
_cacheable = False
_day_opt = None
_attributes = frozenset(['n', 'normalize'])

Expand Down Expand Up @@ -386,10 +379,6 @@ class _BaseOffset(object):
# that allows us to use methods that can go in a `cdef class`
return self * 1

# TODO: this is never true. fix it or get rid of it
def _should_cache(self):
return self.isAnchored() and self._cacheable

def __repr__(self):
className = getattr(self, '_outputName', type(self).__name__)

Expand Down
49 changes: 17 additions & 32 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
resolution as libresolution)

from pandas.util._decorators import cache_readonly
from pandas.errors import PerformanceWarning, AbstractMethodError
from pandas.errors import PerformanceWarning
from pandas import compat

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -268,27 +268,22 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
end, end.tz, start.tz, freq, tz
)
if freq is not None:
if cls._use_cached_range(freq, _normalized, start, end):
# Currently always False; never hit
# Should be reimplemented as a part of GH#17914
index = cls._cached_range(start, end, periods=periods,
freq=freq)
else:
index = _generate_regular_range(cls, start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
arr = conversion.tz_localize_to_utc(
ensure_int64(index.values),
tz, ambiguous=ambiguous)

index = cls(arr)

# index is localized datetime64 array -> have to convert
# start/end as well to compare
if start is not None:
start = start.tz_localize(tz).asm8
if end is not None:
end = end.tz_localize(tz).asm8
# TODO: consider re-implementing _cached_range; GH#17914
index = _generate_regular_range(cls, start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
arr = conversion.tz_localize_to_utc(
ensure_int64(index.values),
tz, ambiguous=ambiguous)

index = cls(arr)

# index is localized datetime64 array -> have to convert
# start/end as well to compare
if start is not None:
start = start.tz_localize(tz).asm8
if end is not None:
end = end.tz_localize(tz).asm8
else:
# Create a linearly spaced date_range in local time
arr = np.linspace(start.value, end.value, periods)
Expand All @@ -303,16 +298,6 @@ def _generate_range(cls, start, end, periods, freq, tz=None,

return cls._simple_new(index.values, freq=freq, tz=tz)

@classmethod
def _use_cached_range(cls, freq, _normalized, start, end):
# DatetimeArray is mutable, so is not cached
return False

@classmethod
def _cached_range(cls, start=None, end=None,
periods=None, freq=None, **kwargs):
raise AbstractMethodError(cls)

# -----------------------------------------------------------------
# Descriptive Properties

Expand Down
112 changes: 11 additions & 101 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin,
wrap_field_accessor, wrap_array_method)
from pandas.tseries.offsets import (
generate_range, CDay, prefix_mapping)
CDay, prefix_mapping)

from pandas.core.tools.timedeltas import to_timedelta
from pandas.util._decorators import Appender, cache_readonly, Substitution
Expand Down Expand Up @@ -326,13 +326,6 @@ def _generate_range(cls, start, end, periods, name=None, freq=None,
out.name = name
return out

@classmethod
def _use_cached_range(cls, freq, _normalized, start, end):
# Note: This always returns False
return (freq._should_cache() and
not (freq._normalize_cache and not _normalized) and
_naive_in_cache_range(start, end))

def _convert_for_op(self, value):
""" Convert value to be insertable to ndarray """
if self._has_same_tz(value):
Expand Down Expand Up @@ -410,71 +403,6 @@ def nbytes(self):
# for TZ-aware
return self._ndarray_values.nbytes

@classmethod
def _cached_range(cls, start=None, end=None, periods=None, freq=None,
name=None):
if start is None and end is None:
# I somewhat believe this should never be raised externally
raise TypeError('Must specify either start or end.')
if start is not None:
start = Timestamp(start)
if end is not None:
end = Timestamp(end)
if (start is None or end is None) and periods is None:
raise TypeError(
'Must either specify period or provide both start and end.')

if freq is None:
# This can't happen with external-facing code
raise TypeError('Must provide freq.')

drc = _daterange_cache
if freq not in _daterange_cache:
xdr = generate_range(offset=freq, start=_CACHE_START,
end=_CACHE_END)

arr = tools.to_datetime(list(xdr), box=False)

cachedRange = DatetimeIndex._simple_new(arr)
cachedRange.freq = freq
cachedRange = cachedRange.tz_localize(None)
cachedRange.name = None
drc[freq] = cachedRange
else:
cachedRange = drc[freq]

if start is None:
if not isinstance(end, Timestamp):
raise AssertionError('end must be an instance of Timestamp')

end = freq.rollback(end)

endLoc = cachedRange.get_loc(end) + 1
startLoc = endLoc - periods
elif end is None:
if not isinstance(start, Timestamp):
raise AssertionError('start must be an instance of Timestamp')

start = freq.rollforward(start)

startLoc = cachedRange.get_loc(start)
endLoc = startLoc + periods
else:
if not freq.onOffset(start):
start = freq.rollforward(start)

if not freq.onOffset(end):
end = freq.rollback(end)

startLoc = cachedRange.get_loc(start)
endLoc = cachedRange.get_loc(end) + 1

indexSlice = cachedRange[startLoc:endLoc]
indexSlice.name = name
indexSlice.freq = freq

return indexSlice

def _mpl_repr(self):
# how to represent ourselves to matplotlib
return libts.ints_to_pydatetime(self.asi8, self.tz)
Expand Down Expand Up @@ -832,22 +760,19 @@ def _fast_union(self, other):
else:
left, right = other, self

left_start, left_end = left[0], left[-1]
left_end = left[-1]
right_end = right[-1]

if not self.freq._should_cache():
# concatenate dates
if left_end < right_end:
loc = right.searchsorted(left_end, side='right')
right_chunk = right.values[loc:]
dates = _concat._concat_compat((left.values, right_chunk))
return self._shallow_copy(dates)
else:
return left
# TODO: consider re-implementing freq._should_cache for fastpath

# concatenate dates
if left_end < right_end:
loc = right.searchsorted(left_end, side='right')
right_chunk = right.values[loc:]
dates = _concat._concat_compat((left.values, right_chunk))
return self._shallow_copy(dates)
else:
return type(self)(start=left_start,
end=max(left_end, right_end),
freq=left.freq)
return left

def _wrap_union_result(self, other, result):
name = self.name if self.name == other.name else None
Expand Down Expand Up @@ -1724,21 +1649,6 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
closed=closed, **kwargs)


_CACHE_START = Timestamp(datetime(1950, 1, 1))
_CACHE_END = Timestamp(datetime(2030, 1, 1))

_daterange_cache = {}


def _naive_in_cache_range(start, end):
if start is None or end is None:
return False
else:
if start.tzinfo is not None or end.tzinfo is not None:
return False
return start > _CACHE_START and end < _CACHE_END


def _time_to_micros(time):
seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
return 1000000 * seconds + time.microsecond
40 changes: 0 additions & 40 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,23 +616,6 @@ def test_naive_aware_conflicts(self):
with tm.assert_raises_regex(TypeError, msg):
aware.join(naive)

def test_cached_range(self):
DatetimeIndex._cached_range(START, END, freq=BDay())
DatetimeIndex._cached_range(START, periods=20, freq=BDay())
DatetimeIndex._cached_range(end=START, periods=20, freq=BDay())

with tm.assert_raises_regex(TypeError, "freq"):
DatetimeIndex._cached_range(START, END)

with tm.assert_raises_regex(TypeError, "specify period"):
DatetimeIndex._cached_range(START, freq=BDay())

with tm.assert_raises_regex(TypeError, "specify period"):
DatetimeIndex._cached_range(end=END, freq=BDay())

with tm.assert_raises_regex(TypeError, "start or end"):
DatetimeIndex._cached_range(periods=20, freq=BDay())

def test_misc(self):
end = datetime(2009, 5, 13)
dr = bdate_range(end=end, periods=20)
Expand Down Expand Up @@ -693,29 +676,6 @@ def test_constructor(self):
with tm.assert_raises_regex(TypeError, msg):
bdate_range('2011-1-1', '2012-1-1', 'C')

def test_cached_range(self):
DatetimeIndex._cached_range(START, END, freq=CDay())
DatetimeIndex._cached_range(START, periods=20,
freq=CDay())
DatetimeIndex._cached_range(end=START, periods=20,
freq=CDay())

# with pytest.raises(TypeError):
with tm.assert_raises_regex(TypeError, "freq"):
DatetimeIndex._cached_range(START, END)

# with pytest.raises(TypeError):
with tm.assert_raises_regex(TypeError, "specify period"):
DatetimeIndex._cached_range(START, freq=CDay())

# with pytest.raises(TypeError):
with tm.assert_raises_regex(TypeError, "specify period"):
DatetimeIndex._cached_range(end=END, freq=CDay())

# with pytest.raises(TypeError):
with tm.assert_raises_regex(TypeError, "start or end"):
DatetimeIndex._cached_range(periods=20, freq=CDay())

def test_misc(self):
end = datetime(2009, 5, 13)
dr = bdate_range(end=end, periods=20, freq='C')
Expand Down
Loading