diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 038e4afdbd767..7f599635fcd5c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -821,14 +821,37 @@ def apply( # This calls DataSplitter.__iter__ zipped = zip(group_keys, splitter) + i = 0 for key, group in zipped: + # BUG:47350 if replaced 1 by hamedgibago + # if key not in data.index and is_datetime64_any_dtype(data.index): + # #or (key not in data.index and f.__name__ in ['idxmax','idxmin']) : + # ser=Series(i,[key]) + # res = None + # else: + # res = f(group) + try: + res = f(group) + except (ValueError, AttributeError): + # except ValueError: + res = None + object.__setattr__(group, "name", key) # group might be modified group_axes = group.axes - res = f(group) + if not mutated and not _is_indexed_like(res, group_axes, axis): mutated = True + + i = i + 1 + + # BUG:47350 if added by hamedgibago + # if key in data.index: + # result_values.append(res) + # else: + # result_values.append(np.nan) + result_values.append(res) # getattr pattern for __name__ is needed for functools.partial objects diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7e2a9184f04d9..3e48075962fe2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -45,8 +45,8 @@ tz_compare, ) from pandas._typing import ( + AnyArrayLike, ArrayLike, - Axes, Dtype, DtypeObj, F, @@ -261,10 +261,6 @@ def _new_Index(cls, d): # GH#23752 "labels" kwarg has been replaced with "codes" d["codes"] = d.pop("labels") - # Since this was a valid MultiIndex at pickle-time, we don't need to - # check validty at un-pickle time. - d["verify_integrity"] = False - elif "dtype" not in d and "data" in d: # Prevent Index.__new__ from conducting inference; # "data" key not in RangeIndex @@ -277,9 +273,8 @@ def _new_Index(cls, d): class Index(IndexOpsMixin, PandasObject): """ - Immutable sequence used for indexing and alignment. - - The basic object storing axis labels for all pandas objects. + Immutable sequence used for indexing and alignment. The basic object + storing axis labels for all pandas objects. Parameters ---------- @@ -2297,7 +2292,8 @@ def is_monotonic(self) -> bool: @property def is_monotonic_increasing(self) -> bool: """ - Return a boolean if the values are equal or increasing. + Return if the index is monotonic increasing (only equal or + increasing) values. Examples -------- @@ -2313,7 +2309,8 @@ def is_monotonic_increasing(self) -> bool: @property def is_monotonic_decreasing(self) -> bool: """ - Return a boolean if the values are equal or decreasing. + Return if the index is monotonic decreasing (only equal or + decreasing) values. Examples -------- @@ -3815,9 +3812,8 @@ def get_loc(self, key, method=None, tolerance=None): _index_shared_docs[ "get_indexer" ] = """ - Compute indexer and mask for new index given the current index. - - The indexer should be then used as an input to ndarray.take to align the + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the current data to the new index. Parameters @@ -4586,7 +4582,8 @@ def join( sort: bool = False, ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: """ - Compute join_index and indexers to conform data structures to the new index. + Compute join_index and indexers to conform data + structures to the new index. Parameters ---------- @@ -4687,7 +4684,6 @@ def join( not isinstance(self, ABCMultiIndex) or not any(is_categorical_dtype(dtype) for dtype in self.dtypes) ) - and not is_categorical_dtype(self.dtype) ): # Categorical is monotonic if data are ordered as categories, but join can # not handle this in case of not lexicographically monotonic GH#38502 @@ -5983,9 +5979,8 @@ def set_value(self, arr, key, value) -> None: _index_shared_docs[ "get_indexer_non_unique" ] = """ - Compute indexer and mask for new index given the current index. - - The indexer should be then used as an input to ndarray.take to align the + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the current data to the new index. Parameters @@ -7283,7 +7278,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: return MultiIndex.from_arrays(sequences, names=names) -def ensure_index(index_like: Axes, copy: bool = False) -> Index: +def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Index: """ Ensure that we have an index from some index-like object. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index e3d81e01ac94c..d3138c0ad682a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1982,6 +1982,12 @@ def _get_timestamp_range_edges( ------- A tuple of length 2, containing the adjusted pd.Timestamp objects. """ + if isinstance(origin, Timestamp): + first, last = _adjust_dates_anchored( + first, last, freq, closed=closed, origin=origin, offset=offset + ) + return first, last + if isinstance(freq, Tick): index_tz = first.tz if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): @@ -2116,7 +2122,10 @@ def _adjust_dates_anchored( origin_nanos = origin.value elif origin in ["end", "end_day"]: origin = last if origin == "end" else last.ceil("D") - sub_freq_times = (origin.value - first.value) // freq.nanos + if isinstance(freq, Tick): + sub_freq_times = (origin.value - first.value) // freq.nanos + else: + sub_freq_times = origin.value - first.value if closed == "left": sub_freq_times += 1 first = origin - sub_freq_times * freq @@ -2133,19 +2142,29 @@ def _adjust_dates_anchored( if last_tzinfo is not None: last = last.tz_convert("UTC") - foffset = (first.value - origin_nanos) % freq.nanos - loffset = (last.value - origin_nanos) % freq.nanos + if isinstance(freq, Tick): + foffset = (first.value - origin_nanos) % freq.nanos + loffset = (last.value - origin_nanos) % freq.nanos + else: + foffset = first.value - origin_nanos + loffset = last.value - origin_nanos if closed == "right": if foffset > 0: # roll back fresult_int = first.value - foffset else: - fresult_int = first.value - freq.nanos + if isinstance(freq, Tick): + fresult_int = first.value - freq.nanos + else: + fresult_int = first.value if loffset > 0: - # roll forward - lresult_int = last.value + (freq.nanos - loffset) + if isinstance(freq, Tick): + # roll forward + lresult_int = last.value + (freq.nanos - loffset) + else: + lresult_int = last.value - loffset else: # already the end of the road lresult_int = last.value @@ -2157,10 +2176,16 @@ def _adjust_dates_anchored( fresult_int = first.value if loffset > 0: - # roll forward - lresult_int = last.value + (freq.nanos - loffset) + if isinstance(freq, Tick): + # roll forward + lresult_int = last.value + (freq.nanos - loffset) + else: + lresult_int = last.value - loffset else: - lresult_int = last.value + freq.nanos + if isinstance(freq, Tick): + lresult_int = last.value + freq.nanos + else: + lresult_int = last.value fresult = Timestamp(fresult_int) lresult = Timestamp(lresult_int) if first_tzinfo is not None: diff --git a/pandas/core/series.py b/pandas/core/series.py index 20f0ecd06fbd1..9e9643f9ebe42 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2175,7 +2175,7 @@ def unique(self) -> ArrayLike: Examples -------- >>> pd.Series([2, 1, 3, 3], name='A').unique() - array([2, 1, 3]) + array([2, 1, 3], dtype=int64) >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d290aada18293..74b01327cb28e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -39,6 +39,31 @@ def test_repr(): assert result == expected +def test_origin_param_no_effect(): + # GH 47653 + df = DataFrame( + [ + {"A": A, "datadate": datadate} + for A in range(1, 3) + for datadate in date_range(start="1/2/2022", end="2/1/2022", freq="D") + ] + ) + + result = df.groupby(["A", Grouper(key="datadate", freq="W", origin="start")]) + + # for i, dfg in result: + # print(dfg[["A", "datadate"]]).. + # print("-----------------------") + + expected = df.groupby(["A", Grouper(key="datadate", freq="W", origin="1/5/2022")]) + + # for i, dfg in expected: + # print(dfg[["A", "datadate"]]) + # print("-----------------------") + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"]) def test_basic(dtype): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 8aff217cca5c1..148d01662bf27 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -45,6 +45,17 @@ async def test_tab_complete_ipython6_warning(ip): list(ip.Completer.completions("rs.", 1)) +def test_dataframe_missing_a_day(): + # GH 47350 + dates = pd.DatetimeIndex(["2022-01-01", "2022-01-02", "2022-01-04"]) + df = DataFrame([0, 1, 2], index=dates) + result = df.resample("D")[0].idxmax() # raises value error + + expected = df.resample("D")[0].apply(lambda x: x.idxmax() if len(x) else None) + + tm.assert_series_equal(result, expected) + + def test_deferred_with_groupby(): # GH 12486