From 2b4499f10588056c0ca69cdb75db6d41be2f7d5c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Dec 2021 19:55:03 -0800 Subject: [PATCH 1/7] Fix docstring, use checknull, closes #32206 --- pandas/_libs/hashtable_class_helper.pxi.in | 4 +--- pandas/core/construction.py | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 77d3f954a9a5d..0446b675e07d7 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -1228,9 +1228,7 @@ cdef class PyObjectHashTable(HashTable): hash(val) if ignore_na and ( - (val is C_NA) - or (val != val) - or (val is None) + checknull(val) or (use_na_value and val == na_value) ): # if missing values do not count as unique values (i.e. if diff --git a/pandas/core/construction.py b/pandas/core/construction.py index cf8cd070ec562..17fa2d6e2f388 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -388,10 +388,9 @@ def extract_array( ---------- obj : object For Series / Index, the underlying ExtensionArray is unboxed. - For Numpy-backed ExtensionArrays, the ndarray is extracted. extract_numpy : bool, default False - Whether to extract the ndarray from a PandasArray + Whether to extract the ndarray from a PandasArray. extract_range : bool, default False If we have a RangeIndex, return range._values if True From 6f2a8534d1f9a0b1b4d2cda0880a84b08caa63d4 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Dec 2021 17:11:50 -0800 Subject: [PATCH 2/7] comments --- pandas/core/algorithms.py | 3 +-- pandas/core/arrays/base.py | 5 +++++ pandas/core/arrays/string_arrow.py | 2 ++ pandas/core/dtypes/base.py | 4 ++++ setup.py | 4 ++-- 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b12e5be7722d0..5e82e4e1d2426 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -119,7 +119,7 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: This will coerce: - ints -> int64 - uint -> uint64 - - bool -> uint64 (TODO this should be uint8) + - bool -> uint8 - datetimelike -> i8 - datetime64tz -> i8 (in local tz) - categorical -> codes @@ -899,7 +899,6 @@ def value_counts_arraylike(values, dropna: bool): original = values values = _ensure_data(values) - # TODO: handle uint8 keys, counts = htable.value_count(values, dropna) if needs_i8_conversion(original.dtype): diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fc915f5f84d8b..c27ed13d6dd1e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1500,6 +1500,11 @@ def _fill_mask_inplace( def _empty(cls, shape: Shape, dtype: ExtensionDtype): """ Create an ExtensionArray with the given shape and dtype. + + See also + -------- + ExtensionDtype.empty + ExtensionDtype.empty is the 'official' public version of this API. """ obj = cls._from_sequence([], dtype=dtype) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 53fc38a973110..42dff27866d84 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -355,6 +355,8 @@ def copy(self) -> ArrowStringArray: """ Return a shallow copy of the array. + Underlying ChunkedArray is immutable, so a deep copy is unnecessary. + Returns ------- ArrowStringArray diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index afd0d69e7e829..fa07b5fea5ea3 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -215,6 +215,10 @@ def empty(self, shape: Shape) -> type_t[ExtensionArray]: Analogous to numpy.empty. + Parameters + ---------- + shape : int or tuple[int] + Returns ------- ExtensionArray diff --git a/setup.py b/setup.py index ca71510c5f051..db65ea72e4a96 100755 --- a/setup.py +++ b/setup.py @@ -392,8 +392,8 @@ def run(self): # ---------------------------------------------------------------------- # Specification of Dependencies -# TODO: Need to check to see if e.g. `linetrace` has changed and possibly -# re-compile. +# TODO(cython#4518): Need to check to see if e.g. `linetrace` has changed and +# possibly re-compile. def maybe_cythonize(extensions, *args, **kwargs): """ Render tempita templates before calling cythonize. This is skipped for From 15a5ba17aa50b80c2df838779d9129a2468b9ab0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Dec 2021 17:12:10 -0800 Subject: [PATCH 3/7] test for tm.shares_memory --- pandas/_testing/__init__.py | 2 ++ pandas/tests/util/test_shares_memory.py | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 pandas/tests/util/test_shares_memory.py diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 5154626dc3c7c..2e9f92ebc7cb7 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -1078,6 +1078,8 @@ def shares_memory(left, right) -> bool: return shares_memory(left._ndarray, right) if isinstance(left, pd.core.arrays.SparseArray): return shares_memory(left.sp_values, right) + if isinstance(left, pd.core.arrays.IntervalArray): + return shares_memory(left._left, right) or shares_memory(left._right, right) if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]": # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 diff --git a/pandas/tests/util/test_shares_memory.py b/pandas/tests/util/test_shares_memory.py new file mode 100644 index 0000000000000..ed8227a5c4307 --- /dev/null +++ b/pandas/tests/util/test_shares_memory.py @@ -0,0 +1,13 @@ +import pandas as pd +import pandas._testing as tm + + +def test_shares_memory_interval(): + obj = pd.interval_range(1, 5) + + assert tm.shares_memory(obj, obj) + assert tm.shares_memory(obj, obj._data) + assert tm.shares_memory(obj, obj[::-1]) + assert tm.shares_memory(obj, obj[:2]) + + assert not tm.shares_memory(obj, obj._data.copy()) From 969c233f16f97eefb1bc22062ff3196ebb62e817 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Dec 2021 16:15:16 -0800 Subject: [PATCH 4/7] Clarify that isnull/notnull are aliases GH#41855 --- pandas/core/frame.py | 6 ++++++ pandas/core/series.py | 6 ++++++ pandas/tests/base/test_misc.py | 13 +++++++++++++ 3 files changed, 25 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 03c9addefecc0..794fb2afc7f9e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5847,6 +5847,9 @@ def isna(self) -> DataFrame: @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> DataFrame: + """ + DataFrame.isnull is an alias for DataFrame.isna. + """ return self.isna() @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) @@ -5855,6 +5858,9 @@ def notna(self) -> DataFrame: @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> DataFrame: + """ + DataFrame.notnull is an alias for DataFrame.notna. + """ return ~self.isna() @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) diff --git a/pandas/core/series.py b/pandas/core/series.py index 15805c0aa94ed..746512e8fb7d6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5231,6 +5231,9 @@ def isna(self) -> Series: # error: Cannot determine type of 'isna' @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] def isnull(self) -> Series: + """ + Series.isnull is an alias for Series.isna. + """ return super().isnull() # error: Cannot determine type of 'notna' @@ -5241,6 +5244,9 @@ def notna(self) -> Series: # error: Cannot determine type of 'notna' @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] def notnull(self) -> Series: + """ + Series.notnull is an alias for Series.notna. + """ return super().notnull() @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 8372ec92ec26e..e4fbbc07c688a 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -21,6 +21,19 @@ import pandas._testing as tm +def test_isnull_notnull_docstrings(): + # GH#41855 make sure its clear these are aliases + doc = pd.DataFrame.notnull.__doc__ + assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n") + doc = pd.DataFrame.isnull.__doc__ + assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n") + + doc = Series.notnull.__doc__ + assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n") + doc = Series.isnull.__doc__ + assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n") + + @pytest.mark.parametrize( "op_name, op", [ From 59d37b0ba5e03dcd36917e941a1a69d7219bc39f Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 25 Dec 2021 15:55:43 -0800 Subject: [PATCH 5/7] DEPR: NaT.freq --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/_libs/tslibs/nattype.pxd | 1 - pandas/_libs/tslibs/nattype.pyx | 10 +++++++++- pandas/tests/scalar/test_nat.py | 5 +++++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ccad93d83eb5b..09ec8eef95825 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -547,7 +547,7 @@ Other Deprecations - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) - Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) -- +- Deprecated :meth:`NaT.freq` (:issue:`??`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd index b7c14e0a5b068..5e5f4224f902f 100644 --- a/pandas/_libs/tslibs/nattype.pxd +++ b/pandas/_libs/tslibs/nattype.pxd @@ -10,7 +10,6 @@ cdef set c_nat_strings cdef class _NaT(datetime): cdef readonly: int64_t value - object freq cdef _NaT c_NaT diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 141bc01716c4d..ab832c145a052 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -357,10 +357,18 @@ class NaTType(_NaT): base = _NaT.__new__(cls, 1, 1, 1) base.value = NPY_NAT - base.freq = None return base + @property + def freq(self): + warnings.warn( + "NaT.freq is deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=1, + ) + return None + def __reduce_ex__(self, protocol): # python 3.6 compat # https://bugs.python.org/issue28730 diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index cfb3b504f7a79..7850a20efc878 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -718,3 +718,8 @@ def test_pickle(): # GH#4606 p = tm.round_trip_pickle(NaT) assert p is NaT + + +def test_freq_deprecated(): + with tm.assert_produces_warning(FutureWarning, match="deprecated"): + NaT.freq From c925878d8901185a249370ae562593dee1a48f7f Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 25 Dec 2021 16:06:02 -0800 Subject: [PATCH 6/7] catch warning --- pandas/core/reshape/pivot.py | 4 +--- pandas/tests/tools/test_to_datetime.py | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index edd3599aabe35..8949ad3c8fca0 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -221,9 +221,7 @@ def __internal_pivot_table( table = table.sort_index(axis=1) if fill_value is not None: - _table = table.fillna(fill_value, downcast="infer") - assert _table is not None # needed for mypy - table = _table + table = table.fillna(fill_value, downcast="infer") if margins: if dropna: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 013af7eb90cd3..2adb9933da662 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1649,7 +1649,8 @@ def test_to_datetime_respects_dayfirst(self, cache): with pytest.raises(ValueError, match=msg): # if dayfirst is respected, then this would parse as month=13, which # would raise - to_datetime("01-13-2012", dayfirst=True, cache=cache) + with tm.assert_produces_warning(UserWarning, match="Provide format"): + to_datetime("01-13-2012", dayfirst=True, cache=cache) def test_to_datetime_on_datetime64_series(self, cache): # #2699 From 4ac420c30414d732ad9d383b70ca215e1e2842ea Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 25 Dec 2021 16:12:02 -0800 Subject: [PATCH 7/7] gh ref --- doc/source/whatsnew/v1.4.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 09ec8eef95825..adffeec3f533a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -547,7 +547,8 @@ Other Deprecations - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) - Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) -- Deprecated :meth:`NaT.freq` (:issue:`??`) +- Deprecated :meth:`NaT.freq` (:issue:`45071`) +- .. ---------------------------------------------------------------------------