4545
4646from databricks import koalas as ks # For running doctests and reference resolution in PyCharm.
4747from databricks .koalas .base import IndexOpsMixin
48- from databricks .koalas .utils import default_session , name_like_string , scol_for , validate_axis
48+ from databricks .koalas .utils import (
49+ default_session ,
50+ name_like_string ,
51+ scol_for ,
52+ validate_axis ,
53+ align_diff_frames ,
54+ )
4955from databricks .koalas .frame import DataFrame , _reduce_spark_multi
5056from databricks .koalas .internal import _InternalFrame
5157from databricks .koalas .typedef import pandas_wraps
@@ -107,6 +113,9 @@ def from_pandas(pobj: Union["pd.DataFrame", "pd.Series"]) -> Union["Series", "Da
107113 raise ValueError ("Unknown data type: {}" .format (type (pobj )))
108114
109115
116+ _range = range # built-in range
117+
118+
110119def range (
111120 start : int , end : Optional [int ] = None , step : int = 1 , num_partitions : Optional [int ] = None
112121) -> DataFrame :
@@ -1539,11 +1548,11 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
15391548 objs : a sequence of Series or DataFrame
15401549 Any None objects will be dropped silently unless
15411550 they are all None in which case a ValueError will be raised
1542- axis : {0/'index'}, default 0
1551+ axis : {0/'index', 1/'columns' }, default 0
15431552 The axis to concatenate along.
15441553 join : {'inner', 'outer'}, default 'outer'
1545- How to handle indexes on other axis(es)
1546- ignore_index : boolean , default False
1554+ How to handle indexes on other axis (or axes).
1555+ ignore_index : bool , default False
15471556 If True, do not use the index values along the concatenation axis. The
15481557 resulting axis will be labeled 0, ..., n - 1. This is useful if you are
15491558 concatenating objects where the concatenation axis does not have
@@ -1552,14 +1561,17 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
15521561
15531562 Returns
15541563 -------
1555- concatenated : object, type of objs
1564+ object, type of objs
15561565 When concatenating all ``Series`` along the index (axis=0), a
15571566 ``Series`` is returned. When ``objs`` contains at least one
1558- ``DataFrame``, a ``DataFrame`` is returned.
1567+ ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
1568+ the columns (axis=1), a ``DataFrame`` is returned.
15591569
15601570 See Also
15611571 --------
1562- DataFrame.merge
1572+ Series.append : Concatenate Series.
1573+ DataFrame.join : Join DataFrames using indexes.
1574+ DataFrame.merge : Merge DataFrames by indexes or columns.
15631575
15641576 Examples
15651577 --------
@@ -1645,6 +1657,17 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
16451657 1 b 2
16461658 0 c 3
16471659 1 d 4
1660+
1661+ >>> df4 = ks.DataFrame([['bird', 'polly'], ['monkey', 'george']],
1662+ ... columns=['animal', 'name'])
1663+
1664+ Combine with column axis.
1665+
1666+ >>> ks.concat([df1, df4], axis=1)
1667+ letter number animal name
1668+ 0 a 1 bird polly
1669+ 1 b 2 monkey george
1670+
16481671 """
16491672 if isinstance (objs , (DataFrame , IndexOpsMixin )) or not isinstance (
16501673 objs , Iterable
@@ -1655,10 +1678,6 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
16551678 '"{name}"' .format (name = type (objs ).__name__ )
16561679 )
16571680
1658- axis = validate_axis (axis )
1659- if axis != 0 :
1660- raise NotImplementedError ('axis should be either 0 or "index" currently.' )
1661-
16621681 if len (objs ) == 0 :
16631682 raise ValueError ("No objects to concatenate" )
16641683 objs = list (filter (lambda obj : obj is not None , objs ))
@@ -1674,6 +1693,79 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
16741693 "and ks.DataFrame are valid" .format (name = type (objs ).__name__ )
16751694 )
16761695
1696+ axis = validate_axis (axis )
1697+ if axis == 1 :
1698+ if isinstance (objs [0 ], ks .Series ):
1699+ concat_kdf = objs [0 ].to_frame ()
1700+ else :
1701+ concat_kdf = objs [0 ]
1702+
1703+ with ks .option_context ("compute.ops_on_diff_frames" , True ):
1704+
1705+ def assign_columns (kdf , this_column_labels , that_column_labels ):
1706+ # Note that here intentionally uses `zip_longest` that combine
1707+ # all columns.
1708+ for this_label , that_label in itertools .zip_longest (
1709+ this_column_labels , that_column_labels
1710+ ):
1711+ yield (kdf ._kser_for (this_label ), this_label )
1712+ yield (kdf ._kser_for (that_label ), that_label )
1713+
1714+ for kser_or_kdf in objs [1 :]:
1715+ if isinstance (kser_or_kdf , Series ):
1716+ # TODO: there is a corner case to optimize - when the series are from
1717+ # the same DataFrame.
1718+ kser = kser_or_kdf
1719+ # Series in different frames.
1720+ if join == "inner" :
1721+ concat_kdf = align_diff_frames (
1722+ assign_columns ,
1723+ concat_kdf ,
1724+ concat_kdf ._index_normalized_frame (kser ),
1725+ fillna = False ,
1726+ how = "inner" ,
1727+ )
1728+ elif join == "outer" :
1729+ concat_kdf = align_diff_frames (
1730+ assign_columns ,
1731+ concat_kdf ,
1732+ concat_kdf ._index_normalized_frame (kser ),
1733+ fillna = False ,
1734+ how = "full" ,
1735+ )
1736+ else :
1737+ raise ValueError (
1738+ "Only can inner (intersect) or outer (union) join the other axis."
1739+ )
1740+ else :
1741+ kdf = kser_or_kdf
1742+
1743+ if join == "inner" :
1744+ concat_kdf = align_diff_frames (
1745+ assign_columns ,
1746+ concat_kdf ,
1747+ concat_kdf ._index_normalized_frame (kdf ),
1748+ fillna = False ,
1749+ how = "inner" ,
1750+ )
1751+ elif join == "outer" :
1752+ concat_kdf = align_diff_frames (
1753+ assign_columns ,
1754+ concat_kdf ,
1755+ concat_kdf ._index_normalized_frame (kdf ),
1756+ fillna = False ,
1757+ how = "full" ,
1758+ )
1759+ else :
1760+ raise ValueError (
1761+ "Only can inner (intersect) or outer (union) join the other axis."
1762+ )
1763+
1764+ if ignore_index :
1765+ concat_kdf .columns = list (map (str , _range (len (concat_kdf .columns ))))
1766+
1767+ return concat_kdf
1768+
16771769 # Series, Series ...
16781770 # We should return Series if objects are all Series.
16791771 should_return_series = all (map (lambda obj : isinstance (obj , Series ), objs ))
0 commit comments