@@ -1462,13 +1462,14 @@ def test_merge_readonly(self):
1462
1462
1463
1463
def _check_merge (x , y ):
1464
1464
for how in ["inner" , "left" , "outer" ]:
1465
- result = x .join (y , how = how )
1465
+ for sort in [True , False ]:
1466
+ result = x .join (y , how = how , sort = sort )
1466
1467
1467
- expected = merge (x .reset_index (), y .reset_index (), how = how , sort = True )
1468
- expected = expected .set_index ("index" )
1468
+ expected = merge (x .reset_index (), y .reset_index (), how = how , sort = sort )
1469
+ expected = expected .set_index ("index" )
1469
1470
1470
- # TODO check_names on merge?
1471
- tm .assert_frame_equal (result , expected , check_names = False )
1471
+ # TODO check_names on merge?
1472
+ tm .assert_frame_equal (result , expected , check_names = False )
1472
1473
1473
1474
1474
1475
class TestMergeDtypes :
@@ -1751,7 +1752,7 @@ def test_merge_string_dtype(self, how, expected_data, any_string_dtype):
1751
1752
"how, expected_data" ,
1752
1753
[
1753
1754
("inner" , [[True , 1 , 4 ], [False , 5 , 3 ]]),
1754
- ("outer" , [[True , 1 , 4 ], [False , 5 , 3 ]]),
1755
+ ("outer" , [[False , 5 , 3 ], [True , 1 , 4 ]]),
1755
1756
("left" , [[True , 1 , 4 ], [False , 5 , 3 ]]),
1756
1757
("right" , [[False , 5 , 3 ], [True , 1 , 4 ]]),
1757
1758
],
@@ -2331,9 +2332,9 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols):
2331
2332
"outer" ,
2332
2333
DataFrame (
2333
2334
{
2334
- "A" : [100 , 200 , 1 , 300 ],
2335
- "B1" : [60 , 70 , 80 , np .nan ],
2336
- "B2" : [600 , 700 , np . nan , 800 ],
2335
+ "A" : [1 , 100 , 200 , 300 ],
2336
+ "B1" : [80 , 60 , 70 , np .nan ],
2337
+ "B2" : [np . nan , 600 , 700 , 800 ],
2337
2338
}
2338
2339
),
2339
2340
),
@@ -2752,9 +2753,9 @@ def test_merge_outer_with_NaN(dtype):
2752
2753
result = merge (right , left , on = "key" , how = "outer" )
2753
2754
expected = DataFrame (
2754
2755
{
2755
- "key" : [np .nan , np .nan , 1 , 2 ],
2756
- "col2" : [3 , 4 , np .nan , np .nan ],
2757
- "col1" : [np .nan , np .nan , 1 , 2 ],
2756
+ "key" : [1 , 2 , np .nan , np .nan ],
2757
+ "col2" : [np .nan , np .nan , 3 , 4 ],
2758
+ "col1" : [1 , 2 , np .nan , np .nan ],
2758
2759
},
2759
2760
dtype = dtype ,
2760
2761
)
@@ -2847,3 +2848,79 @@ def test_merge_multiindex_single_level():
2847
2848
2848
2849
result = df .merge (df2 , left_on = ["col" ], right_index = True , how = "left" )
2849
2850
tm .assert_frame_equal (result , expected )
2851
+
2852
+
2853
+ @pytest .mark .parametrize ("how" , ["left" , "right" , "inner" , "outer" ])
2854
+ @pytest .mark .parametrize ("sort" , [True , False ])
2855
+ @pytest .mark .parametrize ("on_index" , [True , False ])
2856
+ @pytest .mark .parametrize ("left_unique" , [True , False ])
2857
+ @pytest .mark .parametrize ("left_monotonic" , [True , False ])
2858
+ @pytest .mark .parametrize ("right_unique" , [True , False ])
2859
+ @pytest .mark .parametrize ("right_monotonic" , [True , False ])
2860
+ def test_merge_combinations (
2861
+ how , sort , on_index , left_unique , left_monotonic , right_unique , right_monotonic
2862
+ ):
2863
+ # GH 54611
2864
+ left = [2 , 3 ]
2865
+ if left_unique :
2866
+ left .append (4 if left_monotonic else 1 )
2867
+ else :
2868
+ left .append (3 if left_monotonic else 2 )
2869
+
2870
+ right = [2 , 3 ]
2871
+ if right_unique :
2872
+ right .append (4 if right_monotonic else 1 )
2873
+ else :
2874
+ right .append (3 if right_monotonic else 2 )
2875
+
2876
+ left = DataFrame ({"key" : left })
2877
+ right = DataFrame ({"key" : right })
2878
+
2879
+ if on_index :
2880
+ left = left .set_index ("key" )
2881
+ right = right .set_index ("key" )
2882
+ on_kwargs = {"left_index" : True , "right_index" : True }
2883
+ else :
2884
+ on_kwargs = {"on" : "key" }
2885
+
2886
+ result = merge (left , right , how = how , sort = sort , ** on_kwargs )
2887
+
2888
+ if on_index :
2889
+ left = left .reset_index ()
2890
+ right = right .reset_index ()
2891
+
2892
+ if how in ["left" , "right" , "inner" ]:
2893
+ if how in ["left" , "inner" ]:
2894
+ expected , other , other_unique = left , right , right_unique
2895
+ else :
2896
+ expected , other , other_unique = right , left , left_unique
2897
+ if how == "inner" :
2898
+ keep_values = set (left ["key" ].values ).intersection (right ["key" ].values )
2899
+ keep_mask = expected ["key" ].isin (keep_values )
2900
+ expected = expected [keep_mask ]
2901
+ if sort :
2902
+ expected = expected .sort_values ("key" )
2903
+ if not other_unique :
2904
+ other_value_counts = other ["key" ].value_counts ()
2905
+ repeats = other_value_counts .reindex (expected ["key" ].values , fill_value = 1 )
2906
+ repeats = repeats .astype (np .intp )
2907
+ expected = expected ["key" ].repeat (repeats .values )
2908
+ expected = expected .to_frame ()
2909
+ elif how == "outer" :
2910
+ if on_index and left_unique and left ["key" ].equals (right ["key" ]):
2911
+ expected = DataFrame ({"key" : left ["key" ]})
2912
+ else :
2913
+ left_counts = left ["key" ].value_counts ()
2914
+ right_counts = right ["key" ].value_counts ()
2915
+ expected_counts = left_counts .mul (right_counts , fill_value = 1 )
2916
+ expected_counts = expected_counts .astype (np .intp )
2917
+ expected = expected_counts .index .values .repeat (expected_counts .values )
2918
+ expected = DataFrame ({"key" : expected })
2919
+ expected = expected .sort_values ("key" )
2920
+
2921
+ if on_index :
2922
+ expected = expected .set_index ("key" )
2923
+ else :
2924
+ expected = expected .reset_index (drop = True )
2925
+
2926
+ tm .assert_frame_equal (result , expected )
0 commit comments