@@ -4721,9 +4721,9 @@ def combine_first(self, other) -> "Series":
47214721 * index_scols , cond .alias (self ._internal .data_spark_column_names [0 ])
47224722 ).distinct ()
47234723 internal = self ._internal .with_new_sdf (sdf )
4724- return first_series (ks . DataFrame (internal ))
4724+ return first_series (DataFrame (internal ))
47254725
4726- def dot (self , other ) -> Union [Scalar , "Series" ]:
4726+ def dot (self , other : Union [ "Series" , DataFrame ] ) -> Union [Scalar , "Series" ]:
47274727 """
47284728 Compute the dot product between the Series and the columns of other.
47294729
@@ -4732,7 +4732,7 @@ def dot(self, other) -> Union[Scalar, "Series"]:
47324732
47334733 It can also be called using `self @ other` in Python >= 3.5.
47344734
4735- .. note:: This API is slightly different from pandas when indexes from both
4735+ .. note:: This API is slightly different from pandas when indexes from both Series
47364736 are not aligned. To match with pandas', it requires to read the whole data for,
47374737 for example, counting. pandas raises an exception; however, Koalas just proceeds
47384738 and performs by ignoring mismatches with NaN permissively.
@@ -4774,20 +4774,48 @@ def dot(self, other) -> Union[Scalar, "Series"]:
47744774
47754775 >>> s @ s
47764776 14
4777+
4778+ >>> kdf = ks.DataFrame({'x': [0, 1, 2, 3], 'y': [0, -1, -2, -3]})
4779+ >>> kdf
4780+ x y
4781+ 0 0 0
4782+ 1 1 -1
4783+ 2 2 -2
4784+ 3 3 -3
4785+
4786+ >>> with ks.option_context("compute.ops_on_diff_frames", True):
4787+ ... s.dot(kdf)
4788+ ...
4789+ x 14
4790+ y -14
4791+ dtype: int64
47774792 """
47784793 if isinstance (other , DataFrame ):
4779- raise ValueError (
4780- "Series.dot() is currently not supported with DataFrame since "
4781- "it will cause expansive calculation as many as the number "
4782- "of columns of DataFrame"
4783- )
4784- if self ._kdf is not other ._kdf :
4785- if len (self .index ) != len (other .index ):
4786- raise ValueError ("matrices are not aligned" )
4787- if isinstance (other , Series ):
4788- result = (self * other ).sum ()
4794+ if not same_anchor (self , other ):
4795+ if not self .index .sort_values ().equals (other .index .sort_values ()):
4796+ raise ValueError ("matrices are not aligned" )
47894797
4790- return result
4798+ other = other .copy ()
4799+ column_labels = other ._internal .column_labels
4800+
4801+ self_column_label = verify_temp_column_name (other , "__self_column__" )
4802+ other [self_column_label ] = self
4803+ self_kser = other ._kser_for (self_column_label )
4804+
4805+ product_ksers = [other ._kser_for (label ) * self_kser for label in column_labels ]
4806+
4807+ dot_product_kser = DataFrame (
4808+ other ._internal .with_new_columns (product_ksers , column_labels )
4809+ ).sum ()
4810+
4811+ return cast (Series , dot_product_kser ).rename (self .name )
4812+
4813+ else :
4814+ assert isinstance (other , Series )
4815+ if not same_anchor (self , other ):
4816+ if len (self .index ) != len (other .index ):
4817+ raise ValueError ("matrices are not aligned" )
4818+ return (self * other ).sum ()
47914819
47924820 def __matmul__ (self , other ):
47934821 """
@@ -4945,7 +4973,7 @@ def asof(self, where) -> Union[Scalar, "Series"]:
49454973 should_return_series = True
49464974 if isinstance (self .index , ks .MultiIndex ):
49474975 raise ValueError ("asof is not supported for a MultiIndex" )
4948- if isinstance (where , (ks .Index , ks .Series , ks . DataFrame )):
4976+ if isinstance (where , (ks .Index , ks .Series , DataFrame )):
49494977 raise ValueError ("where cannot be an Index, Series or a DataFrame" )
49504978 if not self .index .is_monotonic_increasing :
49514979 raise ValueError ("asof requires a sorted index" )
0 commit comments