diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index c80195af413f7..3097bfa21f9e1 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -82,7 +82,7 @@ Interval Indexing ^^^^^^^^ -- +- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce7b73a92b18a..a524de3002402 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -243,6 +243,9 @@ def _outer_indexer(self, left, right): _infer_as_myclass = False _engine_type = libindex.ObjectEngine + # whether we support partial string indexing. Overridden + # in DatetimeIndex and PeriodIndex + _supports_partial_string_indexing = False _accessors = {"str"} diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d6f0008a2646f..c01acbeab1473 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -238,6 +238,7 @@ def _join_i8_wrapper(joinf, **kwargs): ) _engine_type = libindex.DatetimeEngine + _supports_partial_string_indexing = True _tz = None _freq = None diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 19fe1eb897f19..f6b3d1076043e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -173,6 +173,7 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): _data = None _engine_type = libindex.PeriodEngine + _supports_partial_string_indexing = True # ------------------------------------------------------------------------ # Index Constructors diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index df89dbe6db6dc..e308ae03730b3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1704,6 +1704,11 @@ def _is_scalar_access(self, key: Tuple): if isinstance(ax, MultiIndex): return False + if isinstance(k, str) and ax._supports_partial_string_indexing: + # partial string indexing, df.loc['2000', 'A'] + # should not be considered scalar + return False + if not ax.is_unique: return False @@ -1719,7 +1724,10 @@ def _get_partial_string_timestamp_match_key(self, key, labels): """Translate any partial string timestamp matches in key, returning the new key (GH 10331)""" if isinstance(labels, MultiIndex): - if isinstance(key, str) and labels.levels[0].is_all_dates: + if ( + isinstance(key, str) + and labels.levels[0]._supports_partial_string_indexing + ): # Convert key '2016-01-01' to # ('2016-01-01'[, slice(None, None, None)]+) key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) @@ -1729,7 +1737,10 @@ def _get_partial_string_timestamp_match_key(self, key, labels): # (..., slice('2016-01-01', '2016-01-01', None), ...) new_key = [] for i, component in enumerate(key): - if isinstance(component, str) and labels.levels[i].is_all_dates: + if ( + isinstance(component, str) + and labels.levels[i]._supports_partial_string_indexing + ): new_key.append(slice(component, component, None)) else: new_key.append(component) @@ -2334,7 +2345,7 @@ def convert_to_index_sliceable(obj, key): # We might have a datetimelike string that we can translate to a # slice here via partial string indexing - if idx.is_all_dates: + if idx._supports_partial_string_indexing: try: return idx._get_string_slice(key) except (KeyError, ValueError, NotImplementedError): diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 3095bf9657277..5660fa5ffed80 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -468,3 +468,14 @@ def test_getitem_with_datestring_with_UTC_offset(self, start, end): with pytest.raises(ValueError, match="The index must be timezone"): df = df.tz_localize(None) df[start:end] + + def test_slice_reduce_to_series(self): + # GH 27516 + df = pd.DataFrame( + {"A": range(24)}, index=pd.date_range("2000", periods=24, freq="M") + ) + expected = pd.Series( + range(12), index=pd.date_range("2000", periods=12, freq="M"), name="A" + ) + result = df.loc["2000", "A"] + tm.assert_series_equal(result, expected)