303303str_type = str
304304
305305
306- class SparkMethods (object ):
307- pass
308-
309-
310306class Series (Frame , IndexOpsMixin , Generic [T ]):
311307 """
312308 Koalas Series that corresponds to Pandas Series logically. This holds Spark Column
@@ -399,11 +395,11 @@ def axes(self):
399395 @property
400396 def spark_type (self ):
401397 warnings .warn (
402- "Series.spark_type is deprecated as of Series.spark.type . "
398+ "Series.spark_type is deprecated as of Series.spark.data_type . "
403399 "Please use the API instead." ,
404400 FutureWarning ,
405401 )
406- return self .spark .type
402+ return self .spark .data_type
407403
408404 spark_type .__doc__ = SparkIndexOpsMethods .type .__doc__
409405
@@ -924,7 +920,7 @@ def map(self, arg):
924920 if isinstance (arg , dict ):
925921 is_start = True
926922 # In case dictionary is empty.
927- current = F .when (F .lit (False ), F .lit (None ).cast (self .spark .type ))
923+ current = F .when (F .lit (False ), F .lit (None ).cast (self .spark .data_type ))
928924
929925 for to_replace , value in arg .items ():
930926 if is_start :
@@ -938,7 +934,7 @@ def map(self, arg):
938934 del arg [np ._NoValue ] # Remove in case it's set in defaultdict.
939935 current = current .otherwise (F .lit (tmp_val ))
940936 else :
941- current = current .otherwise (F .lit (None ).cast (self .spark .type ))
937+ current = current .otherwise (F .lit (None ).cast (self .spark .data_type ))
942938 return self ._with_new_scol (current ).rename (self .name )
943939 else :
944940 return self .apply (arg )
@@ -980,11 +976,11 @@ def astype(self, dtype) -> "Series":
980976 if not spark_type :
981977 raise ValueError ("Type {} not understood" .format (dtype ))
982978 if isinstance (spark_type , BooleanType ):
983- if isinstance (self .spark .type , StringType ):
979+ if isinstance (self .spark .data_type , StringType ):
984980 scol = F .when (self .spark .column .isNull (), F .lit (False )).otherwise (
985981 F .length (self .spark .column ) > 0
986982 )
987- elif isinstance (self .spark .type , (FloatType , DoubleType )):
983+ elif isinstance (self .spark .data_type , (FloatType , DoubleType )):
988984 scol = F .when (
989985 self .spark .column .isNull () | F .isnan (self .spark .column ), F .lit (True )
990986 ).otherwise (self .spark .column .cast (spark_type ))
@@ -1745,7 +1741,7 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
17451741 if lower is None and upper is None :
17461742 return self
17471743
1748- if isinstance (self .spark .type , NumericType ):
1744+ if isinstance (self .spark .data_type , NumericType ):
17491745 scol = self .spark .column
17501746 if lower is not None :
17511747 scol = F .when (scol < lower , lower ).otherwise (scol )
@@ -2714,7 +2710,7 @@ def apply(self, func, args=(), **kwds):
27142710 pser = self .head (limit )._to_internal_pandas ()
27152711 transformed = pser .apply (func , * args , ** kwds )
27162712 kser = Series (transformed )
2717- return self ._transform_batch (apply_each , kser .spark .type )
2713+ return self ._transform_batch (apply_each , kser .spark .data_type )
27182714 else :
27192715 sig_return = infer_return_type (func )
27202716 if not isinstance (sig_return , ScalarType ):
@@ -3021,7 +3017,7 @@ def _transform_batch(self, func, return_schema):
30213017 pser = self .head (limit )._to_internal_pandas ()
30223018 transformed = pser .transform (func )
30233019 kser = Series (transformed )
3024- spark_return_type = kser .spark .type
3020+ spark_return_type = kser .spark .data_type
30253021 else :
30263022 spark_return_type = return_schema
30273023
@@ -4987,7 +4983,7 @@ def _cumprod(self, skipna, part_cols=()):
49874983 from pyspark .sql .functions import pandas_udf
49884984
49894985 def cumprod (scol ):
4990- @pandas_udf (returnType = self .spark .type )
4986+ @pandas_udf (returnType = self .spark .data_type )
49914987 def negative_check (s ):
49924988 assert len (s ) == 0 or ((s > 0 ) | (s .isnull ())).all (), (
49934989 "values should be bigger than 0: %s" % s
@@ -5029,7 +5025,7 @@ def _reduce_for_stat_function(self, sfun, name, axis=None, numeric_only=None):
50295025 raise ValueError ("Series does not support columns axis." )
50305026 num_args = len (signature (sfun ).parameters )
50315027 col_sdf = self .spark .column
5032- col_type = self .spark .type
5028+ col_type = self .spark .data_type
50335029 if isinstance (col_type , BooleanType ) and sfun .__name__ not in ("min" , "max" ):
50345030 # Stat functions cannot be used with boolean values by default
50355031 # Thus, cast to integer (true to 1 and false to 0)
@@ -5050,7 +5046,8 @@ def __len__(self):
50505046 def __getitem__ (self , key ):
50515047 try :
50525048 if (isinstance (key , slice ) and any (type (n ) == int for n in [key .start , key .stop ])) or (
5053- type (key ) == int and not isinstance (self .index .spark .type , (IntegerType , LongType ))
5049+ type (key ) == int
5050+ and not isinstance (self .index .spark .data_type , (IntegerType , LongType ))
50545051 ):
50555052 # Seems like pandas Series always uses int as positional search when slicing
50565053 # with ints, searches based on index values when the value is int.
@@ -5104,10 +5101,10 @@ def __repr__(self):
51045101 return pser .to_string (name = self .name , dtype = self .dtype )
51055102
51065103 def __dir__ (self ):
5107- if not isinstance (self .spark .type , StructType ):
5104+ if not isinstance (self .spark .data_type , StructType ):
51085105 fields = []
51095106 else :
5110- fields = [f for f in self .spark .type .fieldNames () if " " not in f ]
5107+ fields = [f for f in self .spark .data_type .fieldNames () if " " not in f ]
51115108 return super (Series , self ).__dir__ () + fields
51125109
51135110 def __iter__ (self ):
0 commit comments