4545
4646from databricks import koalas as ks # For running doctests and reference resolution in PyCharm.
4747from databricks .koalas .base import IndexOpsMixin
48- from databricks .koalas .utils import default_session , name_like_string , scol_for , validate_axis
48+ from databricks .koalas .utils import (
49+ default_session ,
50+ name_like_string ,
51+ scol_for ,
52+ validate_axis ,
53+ align_diff_frames ,
54+ )
4955from databricks .koalas .frame import DataFrame , _reduce_spark_multi
5056from databricks .koalas .internal import _InternalFrame
5157from databricks .koalas .typedef import pandas_wraps
@@ -107,6 +113,9 @@ def from_pandas(pobj: Union["pd.DataFrame", "pd.Series"]) -> Union["Series", "Da
107113 raise ValueError ("Unknown data type: {}" .format (type (pobj )))
108114
109115
116+ _range = range # built-in range
117+
118+
110119def range (
111120 start : int , end : Optional [int ] = None , step : int = 1 , num_partitions : Optional [int ] = None
112121) -> DataFrame :
@@ -1539,11 +1548,11 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
15391548 objs : a sequence of Series or DataFrame
15401549 Any None objects will be dropped silently unless
15411550 they are all None in which case a ValueError will be raised
1542- axis : {0/'index'}, default 0
1551+ axis : {0/'index', 1/'columns' }, default 0
15431552 The axis to concatenate along.
15441553 join : {'inner', 'outer'}, default 'outer'
1545- How to handle indexes on other axis(es)
1546- ignore_index : boolean , default False
1554+ How to handle indexes on other axis (or axes).
1555+ ignore_index : bool , default False
15471556 If True, do not use the index values along the concatenation axis. The
15481557 resulting axis will be labeled 0, ..., n - 1. This is useful if you are
15491558 concatenating objects where the concatenation axis does not have
@@ -1552,14 +1561,17 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
15521561
15531562 Returns
15541563 -------
1555- concatenated : object, type of objs
1564+ object, type of objs
15561565 When concatenating all ``Series`` along the index (axis=0), a
15571566 ``Series`` is returned. When ``objs`` contains at least one
1558- ``DataFrame``, a ``DataFrame`` is returned.
1567+ ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
1568+ the columns (axis=1), a ``DataFrame`` is returned.
15591569
15601570 See Also
15611571 --------
1562- DataFrame.merge
1572+ Series.append : Concatenate Series.
1573+ DataFrame.join : Join DataFrames using indexes.
1574+ DataFrame.merge : Merge DataFrames by indexes or columns.
15631575
15641576 Examples
15651577 --------
@@ -1645,6 +1657,25 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
16451657 1 b 2
16461658 0 c 3
16471659 1 d 4
1660+
1661+ >>> df4 = ks.DataFrame([['bird', 'polly'], ['monkey', 'george']],
1662+ ... columns=['animal', 'name'])
1663+ >>> ks.concat([df1, df4], axis=1)
1664+ letter number animal name
1665+ 0 a 1 bird polly
1666+ 1 b 2 monkey george
1667+
1668+ Prevent the result from including duplicate index values with the
1669+ ``verify_integrity`` option.
1670+
1671+ >>> df5 = ks.DataFrame([1], index=['a'])
1672+ >>> df5
1673+ 0
1674+ a 1
1675+ >>> df6 = ks.DataFrame([2], index=['a'])
1676+ >>> df6
1677+ 0
1678+ a 2
16481679 """
16491680 if isinstance (objs , (DataFrame , IndexOpsMixin )) or not isinstance (
16501681 objs , Iterable
@@ -1655,10 +1686,6 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
16551686 '"{name}"' .format (name = type (objs ).__name__ )
16561687 )
16571688
1658- axis = validate_axis (axis )
1659- if axis != 0 :
1660- raise NotImplementedError ('axis should be either 0 or "index" currently.' )
1661-
16621689 if len (objs ) == 0 :
16631690 raise ValueError ("No objects to concatenate" )
16641691 objs = list (filter (lambda obj : obj is not None , objs ))
@@ -1674,6 +1701,71 @@ def concat(objs, axis=0, join="outer", ignore_index=False):
16741701 "and ks.DataFrame are valid" .format (name = type (objs ).__name__ )
16751702 )
16761703
1704+ axis = validate_axis (axis )
1705+ if axis == 1 :
1706+ if isinstance (objs [0 ], ks .Series ):
1707+ concat_kdf = objs [0 ].to_frame ()
1708+ else :
1709+ concat_kdf = objs [0 ]
1710+
1711+ with ks .option_context ("compute.ops_on_diff_frames" , True ):
1712+
1713+ def assign_columns (kdf , this_column_labels , that_column_labels ):
1714+ # Note that here intentionally uses `zip_longest` that combine
1715+ # all columns.
1716+ for this_label , that_label in itertools .zip_longest (
1717+ this_column_labels , that_column_labels
1718+ ):
1719+ yield (kdf ._kser_for (this_label ), this_label )
1720+ yield (kdf ._kser_for (that_label ), that_label )
1721+
1722+ for kser_or_kdf in objs [1 :]:
1723+ if isinstance (kser_or_kdf , Series ):
1724+ # TODO: there is a corner case to optimize - when the series are from
1725+ # the same DataFrame.
1726+ kser = kser_or_kdf
1727+ # Series in different frames.
1728+ if join == "inner" :
1729+ concat_kdf = align_diff_frames (
1730+ assign_columns ,
1731+ concat_kdf ,
1732+ concat_kdf ._index_normalized_frame (kser ),
1733+ fillna = False ,
1734+ how = "inner" ,
1735+ )
1736+ else :
1737+ concat_kdf = align_diff_frames (
1738+ assign_columns ,
1739+ concat_kdf ,
1740+ concat_kdf ._index_normalized_frame (kser ),
1741+ fillna = False ,
1742+ how = "full" ,
1743+ )
1744+ else :
1745+ kdf = kser_or_kdf
1746+
1747+ if join == "inner" :
1748+ concat_kdf = align_diff_frames (
1749+ assign_columns ,
1750+ concat_kdf ,
1751+ concat_kdf ._index_normalized_frame (kdf ),
1752+ fillna = False ,
1753+ how = "inner" ,
1754+ )
1755+ else :
1756+ concat_kdf = align_diff_frames (
1757+ assign_columns ,
1758+ concat_kdf ,
1759+ concat_kdf ._index_normalized_frame (kdf ),
1760+ fillna = False ,
1761+ how = "full" ,
1762+ )
1763+
1764+ if ignore_index :
1765+ concat_kdf .columns = list (map (str , _range (len (concat_kdf .columns ))))
1766+
1767+ return concat_kdf
1768+
16771769 # Series, Series ...
16781770 # We should return Series if objects are all Series.
16791771 should_return_series = all (map (lambda obj : isinstance (obj , Series ), objs ))
0 commit comments