@@ -1447,6 +1447,124 @@ def test_hasnans(self):
14471447 kser = ks .from_pandas (pser )
14481448 self .assert_eq (pser .hasnans , kser .hasnans )
14491449
1450+ def test_intersection (self ):
1451+ pidx = pd .Index ([1 , 2 , 3 , 4 ], name = "Koalas" )
1452+ kidx = ks .from_pandas (pidx )
1453+
1454+ # other = Index
1455+ pidx_other = pd .Index ([3 , 4 , 5 , 6 ], name = "Koalas" )
1456+ kidx_other = ks .from_pandas (pidx_other )
1457+ self .assert_eq (pidx .intersection (pidx_other ), kidx .intersection (kidx_other ).sort_values ())
1458+ self .assert_eq (
1459+ (pidx + 1 ).intersection (pidx_other ), (kidx + 1 ).intersection (kidx_other ).sort_values ()
1460+ )
1461+
1462+ pidx_other_different_name = pd .Index ([3 , 4 , 5 , 6 ], name = "Databricks" )
1463+ kidx_other_different_name = ks .from_pandas (pidx_other_different_name )
1464+ self .assert_eq (
1465+ pidx .intersection (pidx_other_different_name ),
1466+ kidx .intersection (kidx_other_different_name ).sort_values (),
1467+ )
1468+ self .assert_eq (
1469+ (pidx + 1 ).intersection (pidx_other_different_name ),
1470+ (kidx + 1 ).intersection (kidx_other_different_name ).sort_values (),
1471+ )
1472+
1473+ pidx_other_from_frame = pd .DataFrame ({"a" : [3 , 4 , 5 , 6 ]}).set_index ("a" ).index
1474+ kidx_other_from_frame = ks .from_pandas (pidx_other_from_frame )
1475+ self .assert_eq (
1476+ pidx .intersection (pidx_other_from_frame ),
1477+ kidx .intersection (kidx_other_from_frame ).sort_values (),
1478+ )
1479+ self .assert_eq (
1480+ (pidx + 1 ).intersection (pidx_other_from_frame ),
1481+ (kidx + 1 ).intersection (kidx_other_from_frame ).sort_values (),
1482+ )
1483+
1484+ # other = MultiIndex
1485+ pmidx = pd .MultiIndex .from_tuples ([("a" , "x" ), ("b" , "y" ), ("c" , "z" )])
1486+ kmidx = ks .from_pandas (pmidx )
1487+ self .assert_eq (
1488+ pidx .intersection (pmidx ), kidx .intersection (kmidx ).sort_values (), almost = True
1489+ )
1490+ self .assert_eq (
1491+ (pidx + 1 ).intersection (pmidx ),
1492+ (kidx + 1 ).intersection (kmidx ).sort_values (),
1493+ almost = True ,
1494+ )
1495+
1496+ # other = Series
1497+ pser = pd .Series ([3 , 4 , 5 , 6 ])
1498+ kser = ks .from_pandas (pser )
1499+ self .assert_eq (pidx .intersection (pser ), kidx .intersection (kser ).sort_values ())
1500+ self .assert_eq ((pidx + 1 ).intersection (pser ), (kidx + 1 ).intersection (kser ).sort_values ())
1501+
1502+ pser_different_name = pd .Series ([3 , 4 , 5 , 6 ], name = "Databricks" )
1503+ kser_different_name = ks .from_pandas (pser_different_name )
1504+ self .assert_eq (
1505+ pidx .intersection (pser_different_name ),
1506+ kidx .intersection (kser_different_name ).sort_values (),
1507+ )
1508+ self .assert_eq (
1509+ (pidx + 1 ).intersection (pser_different_name ),
1510+ (kidx + 1 ).intersection (kser_different_name ).sort_values (),
1511+ )
1512+
1513+ # other = list
1514+ other = [3 , 4 , 5 , 6 ]
1515+ self .assert_eq (pidx .intersection (other ), kidx .intersection (other ).sort_values ())
1516+ self .assert_eq ((pidx + 1 ).intersection (other ), (kidx + 1 ).intersection (other ).sort_values ())
1517+
1518+ # other = tuple
1519+ other = (3 , 4 , 5 , 6 )
1520+ self .assert_eq (pidx .intersection (other ), kidx .intersection (other ).sort_values ())
1521+ self .assert_eq ((pidx + 1 ).intersection (other ), (kidx + 1 ).intersection (other ).sort_values ())
1522+
1523+ # other = dict
1524+ other = {3 : None , 4 : None , 5 : None , 6 : None }
1525+ self .assert_eq (pidx .intersection (other ), kidx .intersection (other ).sort_values ())
1526+ self .assert_eq ((pidx + 1 ).intersection (other ), (kidx + 1 ).intersection (other ).sort_values ())
1527+
1528+ # MultiIndex / other = Index
1529+ self .assert_eq (
1530+ pmidx .intersection (pidx ), kmidx .intersection (kidx ).sort_values (), almost = True
1531+ )
1532+ self .assert_eq (
1533+ pmidx .intersection (pidx_other_from_frame ),
1534+ kmidx .intersection (kidx_other_from_frame ).sort_values (),
1535+ almost = True ,
1536+ )
1537+
1538+ # MultiIndex / other = MultiIndex
1539+ pmidx_other = pd .MultiIndex .from_tuples ([("c" , "z" ), ("d" , "w" )])
1540+ kmidx_other = ks .from_pandas (pmidx_other )
1541+ self .assert_eq (
1542+ pmidx .intersection (pmidx_other ), kmidx .intersection (kmidx_other ).sort_values ()
1543+ )
1544+
1545+ # MultiIndex / other = list
1546+ other = [("c" , "z" ), ("d" , "w" )]
1547+ self .assert_eq (pmidx .intersection (other ), kmidx .intersection (other ).sort_values ())
1548+
1549+ # MultiIndex / other = tuple
1550+ other = (("c" , "z" ), ("d" , "w" ))
1551+ self .assert_eq (pmidx .intersection (other ), kmidx .intersection (other ).sort_values ())
1552+
1553+ # MultiIndex / other = dict
1554+ other = {("c" , "z" ): None , ("d" , "w" ): None }
1555+ self .assert_eq (pmidx .intersection (other ), kmidx .intersection (other ).sort_values ())
1556+
1557+ with self .assertRaisesRegex (TypeError , "Input must be Index or array-like" ):
1558+ kidx .intersection (4 )
1559+ with self .assertRaisesRegex (TypeError , "other must be a MultiIndex or a list of tuples" ):
1560+ kmidx .intersection (4 )
1561+ with self .assertRaisesRegex (TypeError , "other must be a MultiIndex or a list of tuples" ):
1562+ kmidx .intersection (ks .Series ([3 , 4 , 5 , 6 ]))
1563+ with self .assertRaisesRegex (ValueError , "Index data must be 1-dimensional" ):
1564+ kidx .intersection (ks .DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ]}))
1565+ with self .assertRaisesRegex (ValueError , "Index data must be 1-dimensional" ):
1566+ kmidx .intersection (ks .DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ]}))
1567+
14501568 def test_item (self ):
14511569 pidx = pd .Index ([10 ])
14521570 kidx = ks .from_pandas (pidx )
0 commit comments