From 3bbffb4f68dbdded83940e7c7288b819051506ad Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 3 Jun 2022 17:13:46 -0700 Subject: [PATCH 1/5] Address test warnings --- pandas/tests/frame/test_arithmetic.py | 8 +++--- pandas/tests/groupby/aggregate/test_numba.py | 4 --- pandas/tests/groupby/test_numba.py | 2 -- pandas/tests/groupby/transform/test_numba.py | 4 --- pandas/tests/strings/test_find_replace.py | 30 ++++++++++++++++---- pandas/tests/strings/test_strings.py | 6 +++- pandas/tests/window/test_numba.py | 4 --- pandas/tests/window/test_online.py | 1 - pyproject.toml | 3 +- 9 files changed, 36 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3adc63e1a27f6..0864032b741c9 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2007,11 +2007,11 @@ def test_bool_frame_mult_float(): tm.assert_frame_equal(result, expected) -def test_frame_sub_nullable_int(any_int_dtype): +def test_frame_sub_nullable_int(any_int_ea_dtype): # GH 32822 - series1 = Series([1, 2, np.nan], dtype=any_int_dtype) - series2 = Series([1, 2, 3], dtype=any_int_dtype) - expected = DataFrame([0, 0, np.nan], dtype=any_int_dtype) + series1 = Series([1, 2, None], dtype=any_int_ea_dtype) + series2 = Series([1, 2, 3], dtype=any_int_ea_dtype) + expected = DataFrame([0, 0, None], dtype=any_int_ea_dtype) result = series1.to_frame() - series2.to_frame() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index 9f71c2c2fa0b6..b055cf51e50e0 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -47,8 +47,6 @@ def incorrect_function(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore:\n") -# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): @@ -76,8 +74,6 @@ def func_numba(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore:\n") -# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_cache(jit, pandas_obj, nogil, parallel, nopython): diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py index cce92e0763fb8..89d3524f471c2 100644 --- a/pandas/tests/groupby/test_numba.py +++ b/pandas/tests/groupby/test_numba.py @@ -10,8 +10,6 @@ @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore:\n") -# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestEngine: def test_cython_vs_numba_frame( self, sort, nogil, parallel, nopython, numba_supported_reductions diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 1b8570dbdc21d..920af4197b612 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -44,8 +44,6 @@ def incorrect_function(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore:\n") -# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): @@ -73,8 +71,6 @@ def func(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore:\n") -# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_cache(jit, pandas_obj, nogil, parallel, nopython): diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index a6e51cc2f98d6..1c74950e30c40 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -25,7 +25,11 @@ def test_contains(any_string_dtype): values = Series(values, dtype=any_string_dtype) pat = "mmm[_]+" - result = values.str.contains(pat) + with tm.maybe_produces_warning( + PerformanceWarning, + any_string_dtype == "string[pyarrow]" and pa_version_under4p0, + ): + result = values.str.contains(pat) expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series( np.array([False, np.nan, True, True, False], dtype=np.object_), @@ -88,7 +92,11 @@ def test_contains(any_string_dtype): ) tm.assert_series_equal(result, expected) - result = values.str.contains(pat, na=False) + with tm.maybe_produces_warning( + PerformanceWarning, + any_string_dtype == "string[pyarrow]" and pa_version_under4p0, + ): + result = values.str.contains(pat, na=False) expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" expected = Series(np.array([False, False, True, True]), dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -181,7 +189,11 @@ def test_contains_moar(any_string_dtype): dtype=any_string_dtype, ) - result = s.str.contains("a") + with tm.maybe_produces_warning( + PerformanceWarning, + any_string_dtype == "string[pyarrow]" and pa_version_under4p0, + ): + result = s.str.contains("a") expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series( [False, False, False, True, True, False, np.nan, False, False, True], @@ -619,7 +631,11 @@ def test_replace_moar(any_string_dtype): dtype=any_string_dtype, ) - result = ser.str.replace("A", "YYY") + with tm.maybe_produces_warning( + PerformanceWarning, + any_string_dtype == "string[pyarrow]" and pa_version_under4p0, + ): + result = ser.str.replace("A", "YYY") expected = Series( ["YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog", "cat"], dtype=any_string_dtype, @@ -727,7 +743,11 @@ def test_replace_regex_single_character(regex, any_string_dtype): ): result = s.str.replace(".", "a", regex=regex) else: - result = s.str.replace(".", "a", regex=regex) + with tm.maybe_produces_warning( + PerformanceWarning, + any_string_dtype == "string[pyarrow]" and pa_version_under4p0, + ): + result = s.str.replace(".", "a", regex=regex) expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index db99ba8368a8a..aa31a5505b866 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -562,7 +562,11 @@ def test_slice_replace(start, stop, repl, expected, any_string_dtype): def test_strip_lstrip_rstrip(any_string_dtype, method, exp): ser = Series([" aa ", " bb \n", np.nan, "cc "], dtype=any_string_dtype) - result = getattr(ser.str, method)() + with tm.maybe_produces_warning( + PerformanceWarning, + any_string_dtype == "string[pyarrow]" and pa_version_under4p0, + ): + result = getattr(ser.str, method)() expected = Series(exp, dtype=any_string_dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index a029c88fa3a7d..409e8b3b353da 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -50,8 +50,6 @@ def arithmetic_numba_supported_operators(request): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore:\n") -# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestEngine: @pytest.mark.parametrize("jit", [True, False]) def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step): @@ -331,8 +329,6 @@ def test_invalid_kwargs_nopython(): @td.skip_if_no("numba") @pytest.mark.slow -@pytest.mark.filterwarnings("ignore:\n") -# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestTableMethod: def test_table_series_valueerror(self): def f(x): diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index b98129e1b07ec..b32a5f65a7f2a 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -24,7 +24,6 @@ @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore:\n") class TestEWM: def test_invalid_update(self): df = DataFrame({"a": range(5), "b": range(5)}) diff --git a/pyproject.toml b/pyproject.toml index 2f09b003defc6..ecadf91685433 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,8 @@ filterwarnings = [ "ignore:pandas.util.testing is deprecated:FutureWarning:importlib", # Will be fixed in numba 0.56: https://github.com/numba/numba/issues/7758 "ignore:`np.MachAr` is deprecated:DeprecationWarning:numba", - + # Filter warnings when parallel=True and the function expectedly can't be parallelized by Numba + "ignore::numba.NumbaPerformanceWarning", ] junit_family = "xunit2" markers = [ From e45dd60441880a321c53440c3bb298b43a8e860d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 6 Jun 2022 10:00:00 -0700 Subject: [PATCH 2/5] Fix another test warning --- pandas/tests/io/parser/test_parse_dates.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 240b4b725aacd..449d5a954613b 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1538,7 +1538,12 @@ def test_date_parser_resolution_if_not_ns(all_parsers): """ def date_parser(dt, time): - return np.array(dt + "T" + time, dtype="datetime64[s]") + try: + arr = dt + "T" + time + except TypeError: + # dt & time are date/time objects + arr = [datetime.combine(d, t) for d, t in zip(dt, time)] + return np.array(arr, dtype="datetime64[s]") result = parser.read_csv( StringIO(data), From d3c0306a3d6d6cb60e722663c7c30ab25471c423 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 6 Jun 2022 10:21:53 -0700 Subject: [PATCH 3/5] Use filterwarnings again --- pandas/tests/groupby/aggregate/test_numba.py | 2 ++ pandas/tests/groupby/test_numba.py | 1 + pandas/tests/groupby/transform/test_numba.py | 2 ++ pandas/tests/window/test_numba.py | 2 ++ pandas/tests/window/test_online.py | 1 + pyproject.toml | 2 -- 6 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index b055cf51e50e0..64af64651e824 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -47,6 +47,7 @@ def incorrect_function(values, index): @td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): @@ -74,6 +75,7 @@ def func_numba(values, index): @td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_cache(jit, pandas_obj, nogil, parallel, nopython): diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py index 89d3524f471c2..c6415708ae3e7 100644 --- a/pandas/tests/groupby/test_numba.py +++ b/pandas/tests/groupby/test_numba.py @@ -10,6 +10,7 @@ @td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") class TestEngine: def test_cython_vs_numba_frame( self, sort, nogil, parallel, nopython, numba_supported_reductions diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 920af4197b612..214a59d30f5bb 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -44,6 +44,7 @@ def incorrect_function(values, index): @td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): @@ -71,6 +72,7 @@ def func(values, index): @td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_cache(jit, pandas_obj, nogil, parallel, nopython): diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 409e8b3b353da..7a50be8fa2c9d 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -50,6 +50,7 @@ def arithmetic_numba_supported_operators(request): @td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") class TestEngine: @pytest.mark.parametrize("jit", [True, False]) def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step): @@ -329,6 +330,7 @@ def test_invalid_kwargs_nopython(): @td.skip_if_no("numba") @pytest.mark.slow +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") class TestTableMethod: def test_table_series_valueerror(self): def f(x): diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index b32a5f65a7f2a..5b53773494b6c 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -24,6 +24,7 @@ @td.skip_if_no("numba") +@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") class TestEWM: def test_invalid_update(self): df = DataFrame({"a": range(5), "b": range(5)}) diff --git a/pyproject.toml b/pyproject.toml index ecadf91685433..0e2e41fba461c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,8 +50,6 @@ filterwarnings = [ "ignore:pandas.util.testing is deprecated:FutureWarning:importlib", # Will be fixed in numba 0.56: https://github.com/numba/numba/issues/7758 "ignore:`np.MachAr` is deprecated:DeprecationWarning:numba", - # Filter warnings when parallel=True and the function expectedly can't be parallelized by Numba - "ignore::numba.NumbaPerformanceWarning", ] junit_family = "xunit2" markers = [ From 7e9c8fce7162db85c5225270818f094548d89f67 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 6 Jun 2022 10:23:55 -0700 Subject: [PATCH 4/5] Add Brock's change --- pandas/tests/frame/test_constructors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5360885067c7a..78db4f7ea5c75 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2574,7 +2574,8 @@ def check_views(c_only: bool = False): # FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve # view, so we have to check in the other direction - df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) + with tm.assert_produces_warning(FutureWarning, match="will attempt to set"): + df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) assert df.dtypes.iloc[2] == c.dtype if not copy: check_views(True) From e9714f0bfa5268d365f494cae89170b4ff5d2790 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 6 Jun 2022 11:45:38 -0700 Subject: [PATCH 5/5] Just ignore as numbba not on all builds --- pandas/tests/groupby/aggregate/test_numba.py | 6 ++++-- pandas/tests/groupby/test_numba.py | 3 ++- pandas/tests/groupby/transform/test_numba.py | 6 ++++-- pandas/tests/window/test_numba.py | 6 ++++-- pandas/tests/window/test_online.py | 3 ++- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index 64af64651e824..2890b7930611c 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -47,7 +47,8 @@ def incorrect_function(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): @@ -75,7 +76,8 @@ def func_numba(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_cache(jit, pandas_obj, nogil, parallel, nopython): diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py index c6415708ae3e7..4eb7b6a7b5bea 100644 --- a/pandas/tests/groupby/test_numba.py +++ b/pandas/tests/groupby/test_numba.py @@ -10,7 +10,8 @@ @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestEngine: def test_cython_vs_numba_frame( self, sort, nogil, parallel, nopython, numba_supported_reductions diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 214a59d30f5bb..0e26cdc294b55 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -44,7 +44,8 @@ def incorrect_function(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): @@ -72,7 +73,8 @@ def func(values, index): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) def test_cache(jit, pandas_obj, nogil, parallel, nopython): diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 7a50be8fa2c9d..89e00af270a02 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -50,7 +50,8 @@ def arithmetic_numba_supported_operators(request): @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestEngine: @pytest.mark.parametrize("jit", [True, False]) def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step): @@ -330,7 +331,8 @@ def test_invalid_kwargs_nopython(): @td.skip_if_no("numba") @pytest.mark.slow -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestTableMethod: def test_table_series_valueerror(self): def f(x): diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index 5b53773494b6c..88f462869d8b6 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -24,7 +24,8 @@ @td.skip_if_no("numba") -@pytest.mark.filterwarnings("ignore::numba.NumbaPerformanceWarning") +@pytest.mark.filterwarnings("ignore") +# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestEWM: def test_invalid_update(self): df = DataFrame({"a": range(5), "b": range(5)})