@@ -567,7 +567,7 @@ def mul10(x) -> int:
567567
568568 def test_dot_in_column_name (self ):
569569 self .assert_eq (
570- ks .DataFrame (ks .range (1 )._internal .spark_frame .selectExpr ("1 as `a.b`" ))["a.b" ],
570+ ks .DataFrame (ks .range (1 )._internal .spark_frame .selectExpr ("1L as `a.b`" ))["a.b" ],
571571 ks .Series ([1 ], name = "a.b" ),
572572 )
573573
@@ -665,7 +665,7 @@ def _test_dropna(self, pdf, axis):
665665 pdf2 .dropna (inplace = True )
666666 kdf2 .dropna (inplace = True )
667667 self .assert_eq (kdf2 , pdf2 )
668- self .assert_eq (kser , pser , almost = True )
668+ self .assert_eq (kser , pser )
669669
670670 # multi-index
671671 columns = pd .MultiIndex .from_tuples ([("a" , "x" ), ("a" , "y" ), ("b" , "z" )])
@@ -805,7 +805,7 @@ def test_fillna(self):
805805 pdf .fillna ({"x" : - 1 , "y" : - 2 , "z" : - 5 }, inplace = True )
806806 kdf .fillna ({"x" : - 1 , "y" : - 2 , "z" : - 5 }, inplace = True )
807807 self .assert_eq (kdf , pdf )
808- self .assert_eq (kser , pser , almost = True )
808+ self .assert_eq (kser , pser )
809809
810810 s_nan = pd .Series ([- 1 , - 2 , - 5 ], index = ["x" , "y" , "z" ], dtype = int )
811811 self .assert_eq (kdf .fillna (s_nan ), pdf .fillna (s_nan ))
@@ -942,7 +942,7 @@ def test_sort_values(self):
942942 kserA = kdf .a
943943 self .assert_eq (kdf .sort_values ("b" , inplace = True ), pdf .sort_values ("b" , inplace = True ))
944944 self .assert_eq (kdf , pdf )
945- self .assert_eq (kserA , pserA , almost = True )
945+ self .assert_eq (kserA , pserA )
946946
947947 columns = pd .MultiIndex .from_tuples ([("X" , "A" ), ("X" , "B" )])
948948 kdf .columns = columns
@@ -975,7 +975,7 @@ def test_sort_index(self):
975975 kserA = kdf .A
976976 self .assertEqual (kdf .sort_index (inplace = True ), pdf .sort_index (inplace = True ))
977977 self .assert_eq (kdf , pdf )
978- self .assert_eq (kserA , pserA , almost = True )
978+ self .assert_eq (kserA , pserA )
979979
980980 # Assert multi-indices
981981 pdf = pd .DataFrame (
@@ -1759,7 +1759,7 @@ def get_data(left_columns=None, right_columns=None):
17591759 left_pdf .update (right_pdf )
17601760 left_kdf .update (right_kdf )
17611761 self .assert_eq (left_pdf .sort_values (by = ["A" , "B" ]), left_kdf .sort_values (by = ["A" , "B" ]))
1762- self .assert_eq (kser .sort_index (), pser .sort_index (), almost = True )
1762+ self .assert_eq (kser .sort_index (), pser .sort_index ())
17631763
17641764 left_kdf , left_pdf , right_kdf , right_pdf = get_data ()
17651765 left_pdf .update (right_pdf , overwrite = False )
@@ -2063,7 +2063,7 @@ def test_stack(self):
20632063 )
20642064 kdf = ks .from_pandas (pdf )
20652065
2066- self .assert_eq (kdf .stack ().sort_index (), pdf .stack ().sort_index (), almost = True )
2066+ self .assert_eq (kdf .stack ().sort_index (), pdf .stack ().sort_index ())
20672067 self .assert_eq (kdf [[]].stack ().sort_index (), pdf [[]].stack ().sort_index (), almost = True )
20682068
20692069 def test_unstack (self ):
@@ -3362,10 +3362,10 @@ def test_query(self):
33623362 kdf .query ("('A', 'Z') > ('B', 'X')" )
33633363
33643364 def test_take (self ):
3365- kdf = ks .DataFrame (
3365+ pdf = pd .DataFrame (
33663366 {"A" : range (0 , 50000 ), "B" : range (100000 , 0 , - 2 ), "C" : range (100000 , 50000 , - 1 )}
33673367 )
3368- pdf = kdf . to_pandas ( )
3368+ kdf = ks . from_pandas ( pdf )
33693369
33703370 # axis=0 (default)
33713371 self .assert_eq (kdf .take ([1 , 2 ]).sort_index (), pdf .take ([1 , 2 ]).sort_index ())
@@ -3438,6 +3438,7 @@ def test_take(self):
34383438 self .assert_eq (
34393439 kdf .take (range (- 1 , - 3 ), axis = 1 ).sort_index (),
34403440 pdf .take (range (- 1 , - 3 ), axis = 1 ).sort_index (),
3441+ almost = True ,
34413442 )
34423443 self .assert_eq (
34433444 kdf .take ([2 , 1 ], axis = 1 ).sort_index (), pdf .take ([2 , 1 ], axis = 1 ).sort_index (),
@@ -3555,7 +3556,7 @@ def test_squeeze(self):
35553556 axises = [None , 0 , 1 , "rows" , "index" , "columns" ]
35563557
35573558 # Multiple columns
3558- pdf = pd .DataFrame ([[1 , 2 ], [3 , 4 ]], columns = ["a" , "b" ])
3559+ pdf = pd .DataFrame ([[1 , 2 ], [3 , 4 ]], columns = ["a" , "b" ], index = [ "x" , "y" ] )
35593560 kdf = ks .from_pandas (pdf )
35603561 for axis in axises :
35613562 self .assert_eq (pdf .squeeze (axis ), kdf .squeeze (axis ))
@@ -3567,7 +3568,7 @@ def test_squeeze(self):
35673568 self .assert_eq (pdf .squeeze (axis ), kdf .squeeze (axis ))
35683569
35693570 # Single column with single value
3570- pdf = pd .DataFrame ([[1 ]], columns = ["a" ])
3571+ pdf = pd .DataFrame ([[1 ]], columns = ["a" ], index = [ "x" ] )
35713572 kdf = ks .from_pandas (pdf )
35723573 for axis in axises :
35733574 self .assert_eq (pdf .squeeze (axis ), kdf .squeeze (axis ))
@@ -3880,15 +3881,15 @@ def test_iteritems(self):
38803881
38813882 def test_tail (self ):
38823883 if LooseVersion (pyspark .__version__ ) >= LooseVersion ("3.0" ):
3883- pdf = pd .DataFrame (range (1000 ))
3884+ pdf = pd .DataFrame ({ "x" : range (1000 )} )
38843885 kdf = ks .from_pandas (pdf )
38853886
3886- self .assert_eq (pdf .tail (), kdf .tail (), almost = True )
3887- self .assert_eq (pdf .tail (10 ), kdf .tail (10 ), almost = True )
3888- self .assert_eq (pdf .tail (- 990 ), kdf .tail (- 990 ), almost = True )
3889- self .assert_eq (pdf .tail (0 ), kdf .tail (0 ), almost = True )
3890- self .assert_eq (pdf .tail (- 1001 ), kdf .tail (- 1001 ), almost = True )
3891- self .assert_eq (pdf .tail (1001 ), kdf .tail (1001 ), almost = True )
3887+ self .assert_eq (pdf .tail (), kdf .tail ())
3888+ self .assert_eq (pdf .tail (10 ), kdf .tail (10 ))
3889+ self .assert_eq (pdf .tail (- 990 ), kdf .tail (- 990 ))
3890+ self .assert_eq (pdf .tail (0 ), kdf .tail (0 ))
3891+ self .assert_eq (pdf .tail (- 1001 ), kdf .tail (- 1001 ))
3892+ self .assert_eq (pdf .tail (1001 ), kdf .tail (1001 ))
38923893 with self .assertRaisesRegex (TypeError , "bad operand type for unary -: 'str'" ):
38933894 kdf .tail ("10" )
38943895
0 commit comments