@@ -83,9 +83,9 @@ def wrapper(self, *args):
8383 cols = [arg for arg in args if isinstance (arg , IndexOpsMixin )]
8484 if all (self ._kdf is col ._kdf for col in cols ):
8585 # Same DataFrame anchors
86- args = [arg ._scol if isinstance (arg , IndexOpsMixin ) else arg for arg in args ]
87- scol = f (self ._scol , * args )
88- scol = booleanize_null (self ._scol , scol , f )
86+ args = [arg .spark_column if isinstance (arg , IndexOpsMixin ) else arg for arg in args ]
87+ scol = f (self .spark_column , * args )
88+ scol = booleanize_null (self .spark_column , scol , f )
8989
9090 return self ._with_new_scol (scol )
9191 else :
@@ -154,7 +154,13 @@ def __init__(self, internal: _InternalFrame, kdf):
154154 self ._kdf = kdf
155155
156156 @property
157- def _scol (self ):
157+ def spark_column (self ):
158+ """
159+ Spark Column object representing the Series/Index.
160+
161+ .. note:: This Spark Column object is strictly stick to its base DataFrame the Series/Index
162+ was derived from.
163+ """
158164 return self ._internal .spark_column
159165
160166 # arithmetic operators
@@ -202,7 +208,7 @@ def mod(left, right):
202208 def __radd__ (self , other ):
203209 # Handle 'literal' + df['col']
204210 if isinstance (self .spark_type , StringType ) and isinstance (other , str ):
205- return self ._with_new_scol (F .concat (F .lit (other ), self ._scol ))
211+ return self ._with_new_scol (F .concat (F .lit (other ), self .spark_column ))
206212 else :
207213 return _column_op (spark .Column .__radd__ )(self , other )
208214
@@ -336,8 +342,8 @@ def hasnans(self):
336342 >>> ks.Series([1, 2, 3]).rename("a").to_frame().set_index("a").index.hasnans
337343 False
338344 """
339- sdf = self ._internal ._sdf .select (self ._scol )
340- col = self ._scol
345+ sdf = self ._internal ._sdf .select (self .spark_column )
346+ col = self .spark_column
341347
342348 ret = sdf .select (F .max (col .isNull () | F .isnan (col ))).collect ()[0 ][0 ]
343349 return ret
@@ -517,7 +523,7 @@ def _is_monotonic(self, order):
517523 "__partition_id"
518524 ), # Make sure we use the same partition id in the whole job.
519525 F .col (NATURAL_ORDER_COLUMN_NAME ),
520- self ._scol .alias ("__origin" ),
526+ self .spark_column .alias ("__origin" ),
521527 )
522528 .select (
523529 F .col ("__partition_id" ),
@@ -635,7 +641,7 @@ def astype(self, dtype):
635641 spark_type = as_spark_type (dtype )
636642 if not spark_type :
637643 raise ValueError ("Type {} not understood" .format (dtype ))
638- return self ._with_new_scol (self ._scol .cast (spark_type ))
644+ return self ._with_new_scol (self .spark_column .cast (spark_type ))
639645
640646 def isin (self , values ):
641647 """
@@ -687,7 +693,7 @@ def isin(self, values):
687693 " to isin(), you passed a [{values_type}]" .format (values_type = type (values ).__name__ )
688694 )
689695
690- return self ._with_new_scol (self ._scol .isin (list (values ))).rename (self .name )
696+ return self ._with_new_scol (self .spark_column .isin (list (values ))).rename (self .name )
691697
692698 def isnull (self ):
693699 """
@@ -721,9 +727,11 @@ def isnull(self):
721727 if isinstance (self , MultiIndex ):
722728 raise NotImplementedError ("isna is not defined for MultiIndex" )
723729 if isinstance (self .spark_type , (FloatType , DoubleType )):
724- return self ._with_new_scol (self ._scol .isNull () | F .isnan (self ._scol )).rename (self .name )
730+ return self ._with_new_scol (
731+ self .spark_column .isNull () | F .isnan (self .spark_column )
732+ ).rename (self .name )
725733 else :
726- return self ._with_new_scol (self ._scol .isNull ()).rename (self .name )
734+ return self ._with_new_scol (self .spark_column .isNull ()).rename (self .name )
727735
728736 isna = isnull
729737
@@ -819,7 +827,7 @@ def all(self, axis: Union[int, str] = 0) -> bool:
819827 if axis != 0 :
820828 raise NotImplementedError ('axis should be either 0 or "index" currently.' )
821829
822- sdf = self ._internal ._sdf .select (self ._scol )
830+ sdf = self ._internal ._sdf .select (self .spark_column )
823831 col = scol_for (sdf , sdf .columns [0 ])
824832
825833 # Note that we're ignoring `None`s here for now.
@@ -882,7 +890,7 @@ def any(self, axis: Union[int, str] = 0) -> bool:
882890 if axis != 0 :
883891 raise NotImplementedError ('axis should be either 0 or "index" currently.' )
884892
885- sdf = self ._internal ._sdf .select (self ._scol )
893+ sdf = self ._internal ._sdf .select (self .spark_column )
886894 col = scol_for (sdf , sdf .columns [0 ])
887895
888896 # Note that we're ignoring `None`s here for now.
@@ -949,7 +957,7 @@ def _shift(self, periods, fill_value, part_cols=()):
949957 if not isinstance (periods , int ):
950958 raise ValueError ("periods should be an int; however, got [%s]" % type (periods ))
951959
952- col = self ._scol
960+ col = self .spark_column
953961 window = (
954962 Window .partitionBy (* part_cols )
955963 .orderBy (NATURAL_ORDER_COLUMN_NAME )
@@ -1115,9 +1123,9 @@ def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, d
11151123 raise NotImplementedError ("value_counts currently does not support bins" )
11161124
11171125 if dropna :
1118- sdf_dropna = self ._internal ._sdf .select (self ._scol ).dropna ()
1126+ sdf_dropna = self ._internal ._sdf .select (self .spark_column ).dropna ()
11191127 else :
1120- sdf_dropna = self ._internal ._sdf .select (self ._scol )
1128+ sdf_dropna = self ._internal ._sdf .select (self .spark_column )
11211129 index_name = SPARK_DEFAULT_INDEX_NAME
11221130 column_name = self ._internal .data_spark_column_names [0 ]
11231131 sdf = sdf_dropna .groupby (scol_for (sdf_dropna , column_name ).alias (index_name )).count ()
@@ -1207,13 +1215,13 @@ def _nunique(self, dropna=True, approx=False, rsd=0.05):
12071215 colname = self ._internal .data_spark_column_names [0 ]
12081216 count_fn = partial (F .approx_count_distinct , rsd = rsd ) if approx else F .countDistinct
12091217 if dropna :
1210- return count_fn (self ._scol ).alias (colname )
1218+ return count_fn (self .spark_column ).alias (colname )
12111219 else :
12121220 return (
1213- count_fn (self ._scol )
1214- + F .when (F . count ( F . when ( self . _scol . isNull (), 1 ). otherwise ( None )) >= 1 , 1 ). otherwise (
1215- 0
1216- )
1221+ count_fn (self .spark_column )
1222+ + F .when (
1223+ F . count ( F . when ( self . spark_column . isNull (), 1 ). otherwise ( None )) >= 1 , 1
1224+ ). otherwise ( 0 )
12171225 ).alias (colname )
12181226
12191227 def take (self , indices ):
0 commit comments