Merge remote-tracking branch 'upstream/master' into debug/resourcewarning

mroeschke · mroeschke · commit a59e0766c2fe · 2021-12-14T19:20:42.000-08:00
diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml
@@ -31,6 +31,7 @@ jobs:
           [actions-38-slow.yaml, "slow", "", "", "", "", ""],
           [actions-38-locale.yaml, "not slow and not network", "language-pack-zh-hans xsel", "zh_CN.utf8", "zh_CN.utf8", "", ""],
           [actions-39-slow.yaml, "slow", "", "", "", "", ""],
+          [actions-pypy-38.yaml, "not slow and not clipboard", "", "", "", "", ""],
           [actions-39-numpydev.yaml, "not slow and not network", "xsel", "", "", "deprecate", "-W error"],
           [actions-39.yaml, "not slow and not clipboard", "", "", "", "", ""]
         ]
diff --git a/ci/deps/actions-pypy-38.yaml b/ci/deps/actions-pypy-38.yaml
@@ -0,0 +1,20 @@
+name: pandas-dev
+channels:
+  - conda-forge
+dependencies:
+  # TODO: Add the rest of the dependencies in here
+  # once the other plentiful failures/segfaults
+  # with base pandas has been dealt with
+  - python=3.8
+
+  # tools
+  - cython>=0.29.24
+  - pytest>=6.0
+  - pytest-cov
+  - pytest-xdist>=1.31
+  - hypothesis>=5.5.3
+
+  # required
+  - numpy
+  - python-dateutil
+  - pytz
diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst
@@ -22,7 +22,6 @@ Fixed regressions
 - Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
 - Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`)
 - Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`)
-- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
 
 
 .. ---------------------------------------------------------------------------
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -702,6 +702,7 @@ Indexing
 - Bug in :meth:`DataFrame.loc.__setitem__` changing dtype when indexer was completely ``False`` (:issue:`37550`)
 - Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`)
 - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`)
+- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
 -
 
 Missing
@@ -710,6 +711,7 @@ Missing
 - Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`)
 - Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`??`)
 - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`)
+- Bug in :meth:`Index.fillna` incorrectly returning an un-filled :class:`Index` when NA values are present and ``downcast`` argument is specified. This now raises ``NotImplementedError`` instead; do not pass ``downcast`` argument (:issue:`44873`)
 -
 
 MultiIndex
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -242,7 +242,9 @@ def closed(self) -> bool:
 
 # compression keywords and compression
 CompressionDict = Dict[str, Any]
-CompressionOptions = Optional[Union[str, CompressionDict]]
+CompressionOptions = Optional[
+    Union[Literal["infer", "gzip", "bz2", "zip", "xz"], CompressionDict]
+]
 
 
 # types in DataFrameFormatter
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -18,6 +18,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import (
+    arraylike,
     nanops,
     ops,
 )
@@ -144,6 +145,14 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         if result is not NotImplemented:
             return result
 
+        if method == "reduce":
+            result = arraylike.dispatch_reduction_ufunc(
+                self, ufunc, method, *inputs, **kwargs
+            )
+            if result is not NotImplemented:
+                # e.g. tests.series.test_ufunc.TestNumpyReductions
+                return result
+
         # Defer to the implementation of the ufunc on unwrapped values.
         inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
         if out:
@@ -153,13 +162,8 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         result = getattr(ufunc, method)(*inputs, **kwargs)
 
         if ufunc.nout > 1:
-            # multiple return values
-            if not lib.is_scalar(result[0]):
-                # re-box array-like results
-                return tuple(type(self)(x) for x in result)
-            else:
-                # but not scalar reductions
-                return result
+            # multiple return values; re-box array-like results
+            return tuple(type(self)(x) for x in result)
         elif method == "at":
             # no return value
             return None
@@ -171,11 +175,8 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
             # e.g. test_np_max_nested_tuples
             return result
         else:
-            # one return value
-            if not lib.is_scalar(result):
-                # re-box array-like results, but not scalar reductions
-                result = type(self)(result)
-            return result
+            # one return value; re-box array-like results
+            return type(self)(result)
 
     # ------------------------------------------------------------------------
     # Pandas ExtensionArray Interface
diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py
@@ -133,18 +133,13 @@ def __init__(
             # shallow copy here because we don't want to replace what's in
             # scope when we align terms (alignment accesses the underlying
             # numpy array of pandas objects)
-
-            # error: Incompatible types in assignment (expression has type
-            # "ChainMap[str, Any]", variable has type "DeepChainMap[str, Any]")
-            self.scope = self.scope.new_child(  # type: ignore[assignment]
-                (global_dict or frame.f_globals).copy()
-            )
+            scope_global = self.scope.new_child((global_dict or frame.f_globals).copy())
+            self.scope = DeepChainMap(scope_global)
             if not isinstance(local_dict, Scope):
-                # error: Incompatible types in assignment (expression has type
-                # "ChainMap[str, Any]", variable has type "DeepChainMap[str, Any]")
-                self.scope = self.scope.new_child(  # type: ignore[assignment]
+                scope_local = self.scope.new_child(
                     (local_dict or frame.f_locals).copy()
                 )
+                self.scope = DeepChainMap(scope_local)
         finally:
             del frame
 
@@ -257,9 +252,7 @@ def _get_vars(self, stack, scopes: list[str]) -> None:
         for scope, (frame, _, _, _, _, _) in variables:
             try:
                 d = getattr(frame, "f_" + scope)
-                # error: Incompatible types in assignment (expression has type
-                # "ChainMap[str, Any]", variable has type "DeepChainMap[str, Any]")
-                self.scope = self.scope.new_child(d)  # type: ignore[assignment]
+                self.scope = DeepChainMap(self.scope.new_child(d))
             finally:
                 # won't remove it, but DECREF it
                 # in Py3 this probably isn't necessary since frame won't be
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2723,13 +2723,18 @@ def fillna(self, value=None, downcast=None):
         DataFrame.fillna : Fill NaN values of a DataFrame.
         Series.fillna : Fill NaN Values of a Series.
         """
+
         value = self._require_scalar(value)
         if self.hasnans:
             result = self.putmask(self._isnan, value)
             if downcast is None:
                 # no need to care metadata other than name
-                # because it can't have freq if
+                # because it can't have freq if it has NaTs
                 return Index._with_infer(result, name=self.name)
+            raise NotImplementedError(
+                f"{type(self).__name__}.fillna does not support 'downcast' "
+                "argument values other than 'None'."
+            )
         return self._view()
 
     def dropna(self: _IndexT, how: str_t = "any") -> _IndexT:
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -377,20 +377,6 @@ def __contains__(self, key: Any) -> bool:
 
         return contains(self, key, container=self._engine)
 
-    @doc(Index.fillna)
-    def fillna(self, value, downcast=None):
-        value = self._require_scalar(value)
-        try:
-            cat = self._data.fillna(value)
-        except (ValueError, TypeError):
-            # invalid fill_value
-            if not self.hasnans:
-                # nothing to fill, we can get away without casting
-                return self.copy()
-            return self.astype(object).fillna(value, downcast=downcast)
-
-        return type(self)._simple_new(cat, name=self.name)
-
     # TODO(2.0): remove reindex once non-unique deprecation is enforced
     def reindex(
         self, target, method=None, level=None, limit=None, tolerance=None
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -19,6 +19,7 @@
 from pandas._libs.parsers import STR_NA_VALUES
 from pandas._typing import (
     ArrayLike,
+    CompressionOptions,
     DtypeArg,
     FilePath,
     ReadCsvBuffer,
@@ -618,7 +619,7 @@ def read_csv(
     iterator=False,
     chunksize=None,
     # Quoting, Compression, and File Format
-    compression="infer",
+    compression: CompressionOptions = "infer",
     thousands=None,
     decimal: str = ".",
     lineterminator=None,
@@ -716,7 +717,7 @@ def read_table(
     iterator=False,
     chunksize=None,
     # Quoting, Compression, and File Format
-    compression="infer",
+    compression: CompressionOptions = "infer",
     thousands=None,
     decimal: str = ".",
     lineterminator=None,
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
@@ -105,8 +105,8 @@ def __init__(
         names,
         encoding,
         stylesheet,
-        compression,
-        storage_options,
+        compression: CompressionOptions,
+        storage_options: StorageOptions,
     ) -> None:
         self.path_or_buffer = path_or_buffer
         self.xpath = xpath
@@ -570,8 +570,8 @@ def _transform_doc(self) -> bytes:
 def get_data_from_filepath(
     filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str],
     encoding,
-    compression,
-    storage_options,
+    compression: CompressionOptions,
+    storage_options: StorageOptions,
 ) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]:
     """
     Extract raw XML data.
@@ -666,8 +666,8 @@ def _parse(
     encoding,
     parser,
     stylesheet,
-    compression,
-    storage_options,
+    compression: CompressionOptions,
+    storage_options: StorageOptions,
     **kwargs,
 ) -> DataFrame:
     """
diff --git a/pandas/tests/indexes/categorical/test_fillna.py b/pandas/tests/indexes/categorical/test_fillna.py
@@ -25,17 +25,19 @@ def test_fillna_categorical(self):
         tm.assert_index_equal(result, expected)
 
     def test_fillna_copies_with_no_nas(self):
-        # Nothing to fill, should still get a copy
+        # Nothing to fill, should still get a copy for the Categorical method,
+        #  but OK to get a view on CategoricalIndex method
         ci = CategoricalIndex([0, 1, 1])
-        cat = ci._data
         result = ci.fillna(0)
-        assert result._values._ndarray is not cat._ndarray
-        assert result._values._ndarray.base is None
+        assert result is not ci
+        assert tm.shares_memory(result, ci)
 
-        # Same check directly on the Categorical object
+        # But at the EA level we always get a copy.
+        cat = ci._data
         result = cat.fillna(0)
         assert result._ndarray is not cat._ndarray
         assert result._ndarray.base is None
+        assert not tm.shares_memory(result, cat)
 
     def test_fillna_validates_with_no_nas(self):
         # We validate the fill value even if fillna is a no-op
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -516,6 +516,11 @@ def test_fillna(self, index):
 
             idx = type(index)(values)
 
+            msg = "does not support 'downcast'"
+            with pytest.raises(NotImplementedError, match=msg):
+                # For now at least, we only raise if there are NAs present
+                idx.fillna(idx[0], downcast="infer")
+
             expected = np.array([False] * len(idx), dtype=bool)
             expected[1] = True
             tm.assert_numpy_array_equal(idx._isnan, expected)
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
@@ -1311,7 +1311,12 @@ def test_filename_and_suffix_comp(parser, comp, compfile):
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            geom_df.to_xml(path, parser=parser, compression="7z")
+            # Argument "compression" to "to_xml" of "DataFrame" has incompatible type
+            # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
+            # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
+            geom_df.to_xml(
+                path, parser=parser, compression="7z"  # type: ignore[arg-type]
+            )
 
 
 # STORAGE OPTIONS
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
@@ -1069,7 +1069,10 @@ def test_wrong_compression_zip(parser, comp):
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            read_xml(path, parser=parser, compression="7z")
+            # error: Argument "compression" to "read_xml" has incompatible type
+            # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
+            # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
+            read_xml(path, parser=parser, compression="7z")  # type: ignore[arg-type]
 
 
 # STORAGE OPTIONS
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@ jobs:`
`31`	`31`	`[actions-38-slow.yaml, "slow", "", "", "", "", ""],`
`32`	`32`	`[actions-38-locale.yaml, "not slow and not network", "language-pack-zh-hans xsel", "zh_CN.utf8", "zh_CN.utf8", "", ""],`
`33`	`33`	`[actions-39-slow.yaml, "slow", "", "", "", "", ""],`
	`34`	`+ [actions-pypy-38.yaml, "not slow and not clipboard", "", "", "", "", ""],`
`34`	`35`	`[actions-39-numpydev.yaml, "not slow and not network", "xsel", "", "", "deprecate", "-W error"],`
`35`	`36`	`[actions-39.yaml, "not slow and not clipboard", "", "", "", "", ""]`
`36`	`37`	`]`
Original file line number	Diff line number	Diff line change
`@@ -702,6 +702,7 @@ Indexing`
`702`	`702`	- Bug in :meth:`DataFrame.loc.__setitem__` changing dtype when indexer was completely ``False`` (:issue:`37550`)
`703`	`703`	- Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`)
`704`	`704`	- Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`)
	`705`	+- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
`705`	`706`	`-`
`706`	`707`
`707`	`708`	`Missing`
`@@ -710,6 +711,7 @@ Missing`
`710`	`711`	- Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`)
`711`	`712`	- Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`??`)
`712`	`713`	- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`)
	`714`	+- Bug in :meth:`Index.fillna` incorrectly returning an un-filled :class:`Index` when NA values are present and ``downcast`` argument is specified. This now raises ``NotImplementedError`` instead; do not pass ``downcast`` argument (:issue:`44873`)
`713`	`715`	`-`
`714`	`716`
`715`	`717`	`MultiIndex`