26
26
from pandas .tests .extension import base
27
27
28
28
29
- def split_array (arr ):
30
- if arr .dtype .storage != "pyarrow" :
31
- pytest .skip ("only applicable for pyarrow chunked array n/a" )
32
-
33
- def _split_array (arr ):
34
- import pyarrow as pa
35
-
36
- arrow_array = arr ._pa_array
37
- split = len (arrow_array ) // 2
38
- arrow_array = pa .chunked_array (
39
- [* arrow_array [:split ].chunks , * arrow_array [split :].chunks ]
40
- )
41
- assert arrow_array .num_chunks == 2
42
- return type (arr )(arrow_array )
43
-
44
- return _split_array (arr )
29
+ def maybe_split_array (arr , chunked ):
30
+ if not chunked :
31
+ return arr
32
+ elif arr .dtype .storage != "pyarrow" :
33
+ return arr
34
+
35
+ pa = pytest .importorskip ("pyarrow" )
36
+
37
+ arrow_array = arr ._pa_array
38
+ split = len (arrow_array ) // 2
39
+ arrow_array = pa .chunked_array (
40
+ [* arrow_array [:split ].chunks , * arrow_array [split :].chunks ]
41
+ )
42
+ assert arrow_array .num_chunks == 2
43
+ return type (arr )(arrow_array )
45
44
46
45
47
46
@pytest .fixture (params = [True , False ])
@@ -61,34 +60,34 @@ def data(dtype, chunked):
61
60
strings = np .random .default_rng (2 ).choice (list (string .ascii_letters ), size = 100 )
62
61
63
62
arr = dtype .construct_array_type ()._from_sequence (strings )
64
- return split_array (arr ) if chunked else arr
63
+ return maybe_split_array (arr , chunked )
65
64
66
65
67
66
@pytest .fixture
68
67
def data_missing (dtype , chunked ):
69
68
"""Length 2 array with [NA, Valid]"""
70
69
arr = dtype .construct_array_type ()._from_sequence ([pd .NA , "A" ])
71
- return split_array (arr ) if chunked else arr
70
+ return maybe_split_array (arr , chunked )
72
71
73
72
74
73
@pytest .fixture
75
74
def data_for_sorting (dtype , chunked ):
76
75
arr = dtype .construct_array_type ()._from_sequence (["B" , "C" , "A" ])
77
- return split_array (arr ) if chunked else arr
76
+ return maybe_split_array (arr , chunked )
78
77
79
78
80
79
@pytest .fixture
81
80
def data_missing_for_sorting (dtype , chunked ):
82
81
arr = dtype .construct_array_type ()._from_sequence (["B" , pd .NA , "A" ])
83
- return split_array (arr ) if chunked else arr
82
+ return maybe_split_array (arr , chunked )
84
83
85
84
86
85
@pytest .fixture
87
86
def data_for_grouping (dtype , chunked ):
88
87
arr = dtype .construct_array_type ()._from_sequence (
89
88
["B" , "B" , pd .NA , pd .NA , "A" , "A" , "B" , "C" ]
90
89
)
91
- return split_array (arr ) if chunked else arr
90
+ return maybe_split_array (arr , chunked )
92
91
93
92
94
93
class TestDtype (base .BaseDtypeTests ):
0 commit comments