Skip to content

Commit 7e71a44

Browse files
markroth8jreback
authored andcommitted
ENH: Partial string matching for timestamps with multiindex
closes #10331 closes #12530
1 parent e55875e commit 7e71a44

File tree

4 files changed

+146
-10
lines changed

4 files changed

+146
-10
lines changed

doc/source/timeseries.rst

+17
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,23 @@ We are stopping on the included end-point as it is part of the index
422422
423423
dft.loc['2013-1-15 12:30:00']
424424
425+
.. versionadded:: 0.18.0
426+
427+
DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiIndex``. For example:
428+
429+
.. ipython:: python
430+
431+
dft2 = pd.DataFrame(np.random.randn(20, 1),
432+
columns=['A'],
433+
index=pd.MultiIndex.from_product([pd.date_range('20130101',
434+
periods=10,
435+
freq='12H'),
436+
['a', 'b']]))
437+
dft2
438+
dft2.loc['2013-01-05']
439+
idx = pd.IndexSlice
440+
dft2 = dft2.swaplevel(0, 1).sort_index()
441+
dft2.loc[idx[:, '2013-01-05'], :]
425442
426443
Datetime Indexing
427444
~~~~~~~~~~~~~~~~~

doc/source/whatsnew/v0.18.1.txt

+20-6
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,26 @@ Enhancements
3030
~~~~~~~~~~~~
3131

3232

33-
34-
35-
36-
37-
38-
33+
.. _whatsnew_0181.partial_string_indexing:
34+
35+
Partial string indexing on ``DateTimeIndex`` when part of a ``MultiIndex``
36+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
37+
38+
Partial string indexing now matches on ``DateTimeIndex`` when part of a ``MultiIndex`` (:issue:`10331`)
39+
40+
.. ipython:: python
41+
42+
dft2 = pd.DataFrame(np.random.randn(20, 1),
43+
columns=['A'],
44+
index=pd.MultiIndex.from_product([pd.date_range('20130101',
45+
periods=10,
46+
freq='12H'),
47+
['a', 'b']]))
48+
dft2
49+
dft2.loc['2013-01-05']
50+
idx = pd.IndexSlice
51+
dft2 = dft2.swaplevel(0, 1).sort_index()
52+
dft2.loc[idx[:, '2013-01-05'], :]
3953

4054
.. _whatsnew_0181.other:
4155

pandas/core/indexing.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -1392,8 +1392,33 @@ def error():
13921392

13931393
return True
13941394

1395+
def _get_partial_string_timestamp_match_key(self, key, labels):
1396+
"""Translate any partial string timestamp matches in key, returning the
1397+
new key (GH 10331)"""
1398+
if isinstance(labels, MultiIndex):
1399+
if isinstance(key, compat.string_types) and \
1400+
labels.levels[0].is_all_dates:
1401+
# Convert key '2016-01-01' to
1402+
# ('2016-01-01'[, slice(None, None, None)]+)
1403+
key = tuple([key] + [slice(None)] * (len(labels.levels) - 1))
1404+
1405+
if isinstance(key, tuple):
1406+
# Convert (..., '2016-01-01', ...) in tuple to
1407+
# (..., slice('2016-01-01', '2016-01-01', None), ...)
1408+
new_key = []
1409+
for i, component in enumerate(key):
1410+
if isinstance(component, compat.string_types) and \
1411+
labels.levels[i].is_all_dates:
1412+
new_key.append(slice(component, component, None))
1413+
else:
1414+
new_key.append(component)
1415+
key = tuple(new_key)
1416+
1417+
return key
1418+
13951419
def _getitem_axis(self, key, axis=0):
13961420
labels = self.obj._get_axis(axis)
1421+
key = self._get_partial_string_timestamp_match_key(key, labels)
13971422

13981423
if isinstance(key, slice):
13991424
self._has_valid_type(key, axis)
@@ -1718,11 +1743,12 @@ def convert_to_index_sliceable(obj, key):
17181743
if key in obj._data.items:
17191744
return None
17201745

1721-
# we need a timelike key here
1746+
# We might have a datetimelike string that we can translate to a
1747+
# slice here via partial string indexing
17221748
if idx.is_all_dates:
17231749
try:
17241750
return idx._get_string_slice(key)
1725-
except:
1751+
except ValueError:
17261752
return None
17271753

17281754
return None

pandas/tests/indexes/test_multi.py

+81-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515
import numpy as np
1616

17-
from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
18-
assert_copy)
17+
from pandas.util.testing import (assert_almost_equal, assertRaises,
18+
assertRaisesRegexp, assert_copy)
1919

2020
import pandas.util.testing as tm
2121

@@ -1970,3 +1970,82 @@ def test_index_name_retained(self):
19701970
def test_equals_operator(self):
19711971
# GH9785
19721972
self.assertTrue((self.index == self.index).all())
1973+
1974+
def test_partial_string_timestamp_multiindex(self):
1975+
# GH10331
1976+
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
1977+
abc = ['a', 'b', 'c']
1978+
ix = pd.MultiIndex.from_product([dr, abc])
1979+
df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
1980+
idx = pd.IndexSlice
1981+
1982+
# c1
1983+
# 2016-01-01 00:00:00 a 0
1984+
# b 1
1985+
# c 2
1986+
# 2016-01-01 12:00:00 a 3
1987+
# b 4
1988+
# c 5
1989+
# 2016-01-02 00:00:00 a 6
1990+
# b 7
1991+
# c 8
1992+
# 2016-01-02 12:00:00 a 9
1993+
# b 10
1994+
# c 11
1995+
# 2016-01-03 00:00:00 a 12
1996+
# b 13
1997+
# c 14
1998+
1999+
# partial string matching on a single index
2000+
df_swap = df.swaplevel(0, 1).sort_index()
2001+
just_a = df_swap.loc['a']
2002+
result = just_a.loc['2016-01-01']
2003+
expected = df.loc[idx[:, 'a'], :].iloc[0:2]
2004+
expected.index = expected.index.droplevel(1)
2005+
tm.assert_frame_equal(result, expected)
2006+
2007+
# indexing with IndexSlice
2008+
result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
2009+
expected = df
2010+
tm.assert_frame_equal(result, expected)
2011+
2012+
# match on secondary index
2013+
result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
2014+
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
2015+
tm.assert_frame_equal(result, expected)
2016+
2017+
# Even though this syntax works on a single index, this is somewhat
2018+
# ambiguous and we don't want to extend this behavior forward to work
2019+
# in multi-indexes. This would amount to selecting a scalar from a
2020+
# column.
2021+
with assertRaises(KeyError):
2022+
df['2016-01-01']
2023+
2024+
# partial string match on year only
2025+
result = df.loc['2016']
2026+
expected = df
2027+
tm.assert_frame_equal(result, expected)
2028+
2029+
# partial string match on date
2030+
result = df.loc['2016-01-01']
2031+
expected = df.iloc[0:6]
2032+
tm.assert_frame_equal(result, expected)
2033+
2034+
# partial string match on date and hour, from middle
2035+
result = df.loc['2016-01-02 12']
2036+
expected = df.iloc[9:12]
2037+
tm.assert_frame_equal(result, expected)
2038+
2039+
# partial string match on secondary index
2040+
result = df_swap.loc[idx[:, '2016-01-02'], :]
2041+
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
2042+
tm.assert_frame_equal(result, expected)
2043+
2044+
# tuple selector with partial string match on date
2045+
result = df.loc[('2016-01-01', 'a'), :]
2046+
expected = df.iloc[[0, 3]]
2047+
tm.assert_frame_equal(result, expected)
2048+
2049+
# Slicing date on first level should break (of course)
2050+
with assertRaises(KeyError):
2051+
df_swap.loc['2016-01-01']

0 commit comments

Comments
 (0)