TST: Split / parameterize reshaping tests (#45278)

mroeschke · web-flow · commit b17cedd91264 · 2022-01-10T08:15:37.000-05:00
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
@@ -75,7 +75,8 @@ def test_value_vars(self):
         )
         tm.assert_frame_equal(result4, expected4)
 
-    def test_value_vars_types(self):
+    @pytest.mark.parametrize("type_", (tuple, list, np.array))
+    def test_value_vars_types(self, type_):
         # GH 15348
         expected = DataFrame(
             {
@@ -86,10 +87,8 @@ def test_value_vars_types(self):
             },
             columns=["id1", "id2", "variable", "value"],
         )
-
-        for type_ in (tuple, list, np.array):
-            result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B")))
-            tm.assert_frame_equal(result, expected)
+        result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B")))
+        tm.assert_frame_equal(result, expected)
 
     def test_vars_work_with_multiindex(self):
         expected = DataFrame(
@@ -140,23 +139,21 @@ def test_single_vars_work_with_multiindex(
         result = self.df1.melt(id_vars, value_vars, col_level=col_level)
         tm.assert_frame_equal(result, expected)
 
-    def test_tuple_vars_fail_with_multiindex(self):
+    @pytest.mark.parametrize(
+        "id_vars, value_vars",
+        [
+            [("A", "a"), [("B", "b")]],
+            [[("A", "a")], ("B", "b")],
+            [("A", "a"), ("B", "b")],
+        ],
+    )
+    def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars):
         # melt should fail with an informative error message if
         # the columns have a MultiIndex and a tuple is passed
         # for id_vars or value_vars.
-        tuple_a = ("A", "a")
-        list_a = [tuple_a]
-        tuple_b = ("B", "b")
-        list_b = [tuple_b]
-
         msg = r"(id|value)_vars must be a list of tuples when columns are a MultiIndex"
-        for id_vars, value_vars in (
-            (tuple_a, list_b),
-            (list_a, tuple_b),
-            (tuple_a, tuple_b),
-        ):
-            with pytest.raises(ValueError, match=msg):
-                self.df1.melt(id_vars=id_vars, value_vars=value_vars)
+        with pytest.raises(ValueError, match=msg):
+            self.df1.melt(id_vars=id_vars, value_vars=value_vars)
 
     def test_custom_var_name(self):
         result5 = self.df.melt(var_name=self.var_name)
@@ -261,11 +258,10 @@ def test_custom_var_and_value_name(self):
         result20 = df20.melt()
         assert result20.columns.tolist() == ["foo", "value"]
 
-    def test_col_level(self):
-        res1 = self.df1.melt(col_level=0)
-        res2 = self.df1.melt(col_level="CAP")
-        assert res1.columns.tolist() == ["CAP", "value"]
-        assert res2.columns.tolist() == ["CAP", "value"]
+    @pytest.mark.parametrize("col_level", [0, "CAP"])
+    def test_col_level(self, col_level):
+        res = self.df1.melt(col_level=col_level)
+        assert res.columns.tolist() == ["CAP", "value"]
 
     def test_multiindex(self):
         res = self.df1.melt()
@@ -633,7 +629,7 @@ def test_pairs(self):
         tm.assert_frame_equal(result, exp)
 
         with tm.assert_produces_warning(FutureWarning):
-            result = lreshape(df, spec, dropna=False, label="foo")
+            lreshape(df, spec, dropna=False, label="foo")
 
         spec = {
             "visitdt": [f"visitdt{i:d}" for i in range(1, 3)],
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -260,6 +260,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
         # gh-21378
         df = DataFrame(
             {
@@ -493,6 +494,8 @@ def test_pivot_index_with_nan(self, method):
         tm.assert_frame_equal(result, expected)
         tm.assert_frame_equal(df.pivot("b", "a", "c"), expected.T)
 
+    @pytest.mark.parametrize("method", [True, False])
+    def test_pivot_index_with_nan_dates(self, method):
         # GH9491
         df = DataFrame(
             {
@@ -501,8 +504,8 @@ def test_pivot_index_with_nan(self, method):
             }
         )
         df["b"] = df["a"] - pd.Timestamp("2014-02-02")
-        df.loc[1, "a"] = df.loc[3, "a"] = nan
-        df.loc[1, "b"] = df.loc[4, "b"] = nan
+        df.loc[1, "a"] = df.loc[3, "a"] = np.nan
+        df.loc[1, "b"] = df.loc[4, "b"] = np.nan
 
         if method:
             pv = df.pivot("a", "b", "c")
@@ -851,33 +854,31 @@ def test_pivot_with_tuple_of_values(self, method):
             else:
                 pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz"))
 
-    def test_margins(self):
-        def _check_output(
-            result, values_col, index=["A", "B"], columns=["C"], margins_col="All"
-        ):
-            col_margins = result.loc[result.index[:-1], margins_col]
-            expected_col_margins = self.data.groupby(index)[values_col].mean()
-            tm.assert_series_equal(col_margins, expected_col_margins, check_names=False)
-            assert col_margins.name == margins_col
-
-            result = result.sort_index()
-            index_margins = result.loc[(margins_col, "")].iloc[:-1]
-
-            expected_ix_margins = self.data.groupby(columns)[values_col].mean()
-            tm.assert_series_equal(
-                index_margins, expected_ix_margins, check_names=False
-            )
-            assert index_margins.name == (margins_col, "")
+    def _check_output(
+        self, result, values_col, index=["A", "B"], columns=["C"], margins_col="All"
+    ):
+        col_margins = result.loc[result.index[:-1], margins_col]
+        expected_col_margins = self.data.groupby(index)[values_col].mean()
+        tm.assert_series_equal(col_margins, expected_col_margins, check_names=False)
+        assert col_margins.name == margins_col
 
-            grand_total_margins = result.loc[(margins_col, ""), margins_col]
-            expected_total_margins = self.data[values_col].mean()
-            assert grand_total_margins == expected_total_margins
+        result = result.sort_index()
+        index_margins = result.loc[(margins_col, "")].iloc[:-1]
 
+        expected_ix_margins = self.data.groupby(columns)[values_col].mean()
+        tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False)
+        assert index_margins.name == (margins_col, "")
+
+        grand_total_margins = result.loc[(margins_col, ""), margins_col]
+        expected_total_margins = self.data[values_col].mean()
+        assert grand_total_margins == expected_total_margins
+
+    def test_margins(self):
         # column specified
         result = self.data.pivot_table(
             values="D", index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
         )
-        _check_output(result, "D")
+        self._check_output(result, "D")
 
         # Set a different margins_name (not 'All')
         result = self.data.pivot_table(
@@ -888,15 +889,16 @@ def _check_output(
             aggfunc=np.mean,
             margins_name="Totals",
         )
-        _check_output(result, "D", margins_col="Totals")
+        self._check_output(result, "D", margins_col="Totals")
 
         # no column specified
         table = self.data.pivot_table(
             index=["A", "B"], columns="C", margins=True, aggfunc=np.mean
         )
         for value_col in table.columns.levels[0]:
-            _check_output(table[value_col], value_col)
+            self._check_output(table[value_col], value_col)
 
+    def test_no_col(self):
         # no col
 
         # to help with a buglet
@@ -1353,6 +1355,7 @@ def test_pivot_timegrouper(self, using_array_manager):
                 aggfunc=np.sum,
             )
 
+    def test_pivot_timegrouper_double(self):
         # double grouper
         df = DataFrame(
             {
@@ -1633,7 +1636,8 @@ def test_pivot_dtaccessor(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_daily(self):
+    @pytest.mark.parametrize("i", range(1, 367))
+    def test_daily(self, i):
         rng = date_range("1/1/2000", "12/31/2004", freq="D")
         ts = Series(np.random.randn(len(rng)), index=rng)
 
@@ -1644,28 +1648,27 @@ def test_daily(self):
 
         doy = np.asarray(ts.index.dayofyear)
 
-        for i in range(1, 367):
-            subset = ts[doy == i]
-            subset.index = subset.index.year
+        subset = ts[doy == i]
+        subset.index = subset.index.year
 
-            result = annual[i].dropna()
-            tm.assert_series_equal(result, subset, check_names=False)
-            assert result.name == i
+        result = annual[i].dropna()
+        tm.assert_series_equal(result, subset, check_names=False)
+        assert result.name == i
 
-    def test_monthly(self):
+    @pytest.mark.parametrize("i", range(1, 13))
+    def test_monthly(self, i):
         rng = date_range("1/1/2000", "12/31/2004", freq="M")
         ts = Series(np.random.randn(len(rng)), index=rng)
 
         annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month)
         annual.columns = annual.columns.droplevel(0)
 
         month = ts.index.month
-        for i in range(1, 13):
-            subset = ts[month == i]
-            subset.index = subset.index.year
-            result = annual[i].dropna()
-            tm.assert_series_equal(result, subset, check_names=False)
-            assert result.name == i
+        subset = ts[month == i]
+        subset.index = subset.index.year
+        result = annual[i].dropna()
+        tm.assert_series_equal(result, subset, check_names=False)
+        assert result.name == i
 
     def test_pivot_table_with_iterator_values(self):
         # GH 12017
diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py
@@ -13,9 +13,9 @@
 
 
 class TestUnionCategoricals:
-    def test_union_categorical(self):
-        # GH 13361
-        data = [
+    @pytest.mark.parametrize(
+        "a, b, combined",
+        [
             (list("abc"), list("abd"), list("abcabd")),
             ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
             ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
@@ -39,14 +39,16 @@ def test_union_categorical(self):
                 pd.period_range("2014-01-06", "2014-01-07"),
                 pd.period_range("2014-01-01", "2014-01-07"),
             ),
-        ]
-
-        for a, b, combined in data:
-            for box in [Categorical, CategoricalIndex, Series]:
-                result = union_categoricals([box(Categorical(a)), box(Categorical(b))])
-                expected = Categorical(combined)
-                tm.assert_categorical_equal(result, expected)
+        ],
+    )
+    @pytest.mark.parametrize("box", [Categorical, CategoricalIndex, Series])
+    def test_union_categorical(self, a, b, combined, box):
+        # GH 13361
+        result = union_categoricals([box(Categorical(a)), box(Categorical(b))])
+        expected = Categorical(combined)
+        tm.assert_categorical_equal(result, expected)
 
+    def test_union_categorical_ordered_appearance(self):
         # new categories ordered by appearance
         s = Categorical(["x", "y", "z"])
         s2 = Categorical(["a", "b", "c"])
@@ -56,19 +58,22 @@ def test_union_categorical(self):
         )
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categorical_ordered_true(self):
         s = Categorical([0, 1.2, 2], ordered=True)
         s2 = Categorical([0, 1.2, 2], ordered=True)
         result = union_categoricals([s, s2])
         expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True)
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categorical_match_types(self):
         # must exactly match types
         s = Categorical([0, 1.2, 2])
         s2 = Categorical([2, 3, 4])
         msg = "dtype of categories must be the same"
         with pytest.raises(TypeError, match=msg):
             union_categoricals([s, s2])
 
+    def test_union_categorical_empty(self):
         msg = "No Categoricals to union"
         with pytest.raises(ValueError, match=msg):
             union_categoricals([])
@@ -117,14 +122,11 @@ def test_union_categoricals_nan(self):
         exp = Categorical([np.nan, np.nan, np.nan, np.nan])
         tm.assert_categorical_equal(res, exp)
 
-    def test_union_categoricals_empty(self):
+    @pytest.mark.parametrize("val", [[], ["1"]])
+    def test_union_categoricals_empty(self, val):
         # GH 13759
-        res = union_categoricals([Categorical([]), Categorical([])])
-        exp = Categorical([])
-        tm.assert_categorical_equal(res, exp)
-
-        res = union_categoricals([Categorical([]), Categorical(["1"])])
-        exp = Categorical(["1"])
+        res = union_categoricals([Categorical([]), Categorical(val)])
+        exp = Categorical(val)
         tm.assert_categorical_equal(res, exp)
 
     def test_union_categorical_same_category(self):
@@ -135,6 +137,7 @@ def test_union_categorical_same_category(self):
         exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], categories=[1, 2, 3, 4])
         tm.assert_categorical_equal(res, exp)
 
+    def test_union_categorical_same_category_str(self):
         c1 = Categorical(["z", "z", "z"], categories=["x", "y", "z"])
         c2 = Categorical(["x", "x", "x"], categories=["x", "y", "z"])
         res = union_categoricals([c1, c2])
@@ -293,38 +296,44 @@ def test_union_categoricals_sort_false(self):
         )
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categoricals_sort_false_fastpath(self):
         # fastpath
         c1 = Categorical(["a", "b"], categories=["b", "a", "c"])
         c2 = Categorical(["b", "c"], categories=["b", "a", "c"])
         result = union_categoricals([c1, c2], sort_categories=False)
         expected = Categorical(["a", "b", "b", "c"], categories=["b", "a", "c"])
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categoricals_sort_false_skipresort(self):
         # fastpath - skip resort
         c1 = Categorical(["a", "b"], categories=["a", "b", "c"])
         c2 = Categorical(["b", "c"], categories=["a", "b", "c"])
         result = union_categoricals([c1, c2], sort_categories=False)
         expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"])
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categoricals_sort_false_one_nan(self):
         c1 = Categorical(["x", np.nan])
         c2 = Categorical([np.nan, "b"])
         result = union_categoricals([c1, c2], sort_categories=False)
         expected = Categorical(["x", np.nan, np.nan, "b"], categories=["x", "b"])
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categoricals_sort_false_only_nan(self):
         c1 = Categorical([np.nan])
         c2 = Categorical([np.nan])
         result = union_categoricals([c1, c2], sort_categories=False)
         expected = Categorical([np.nan, np.nan])
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categoricals_sort_false_empty(self):
         c1 = Categorical([])
         c2 = Categorical([])
         result = union_categoricals([c1, c2], sort_categories=False)
         expected = Categorical([])
         tm.assert_categorical_equal(result, expected)
 
+    def test_union_categoricals_sort_false_ordered_true(self):
         c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True)
         c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True)
         result = union_categoricals([c1, c2], sort_categories=False)
diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
@@ -44,17 +44,16 @@ def test_tzaware_retained_categorical(self):
         expected = x.repeat(2)
         tm.assert_index_equal(result1, expected)
 
-    def test_empty(self):
+    @pytest.mark.parametrize("x, y", [[[], []], [[0, 1], []], [[], ["a", "b", "c"]]])
+    def test_empty(self, x, y):
         # product of empty factors
-        X = [[], [0, 1], []]
-        Y = [[], [], ["a", "b", "c"]]
-        for x, y in zip(X, Y):
-            expected1 = np.array([], dtype=np.asarray(x).dtype)
-            expected2 = np.array([], dtype=np.asarray(y).dtype)
-            result1, result2 = cartesian_product([x, y])
-            tm.assert_numpy_array_equal(result1, expected1)
-            tm.assert_numpy_array_equal(result2, expected2)
+        expected1 = np.array([], dtype=np.asarray(x).dtype)
+        expected2 = np.array([], dtype=np.asarray(y).dtype)
+        result1, result2 = cartesian_product([x, y])
+        tm.assert_numpy_array_equal(result1, expected1)
+        tm.assert_numpy_array_equal(result2, expected2)
 
+    def test_empty_input(self):
         # empty product (empty input):
         result = cartesian_product([])
         expected = []