Skip to content

Commit ff6654c

Browse files
committed
finalize
1 parent 8b5e7ea commit ff6654c

File tree

3 files changed

+34
-24
lines changed

3 files changed

+34
-24
lines changed

python/pyarrow/table.pxi

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,9 +1155,9 @@ cdef class Table(_PandasConvertible):
11551155
11561156
Parameters
11571157
----------
1158-
arrays: list of pyarrow.Array or pyarrow.Column
1158+
arrays : list of pyarrow.Array or pyarrow.Column
11591159
Equal-length arrays that should form the table.
1160-
names: list of str, optional
1160+
names : list of str, optional
11611161
Names for the table columns. If Columns passed, will be
11621162
inferred. If Arrays passed, this argument is required
11631163
schema : Schema, default None
@@ -1224,7 +1224,7 @@ cdef class Table(_PandasConvertible):
12241224
12251225
Parameters
12261226
----------
1227-
batches: sequence or iterator of RecordBatch
1227+
batches : sequence or iterator of RecordBatch
12281228
Sequence of RecordBatch to be converted, all schemas must be equal
12291229
schema : Schema, default None
12301230
If not passed, will be inferred from the first RecordBatch

python/pyarrow/tests/strategies.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,16 @@ def complex_types(inner_strategy=primitive_types):
111111
return list_types(inner_strategy) | struct_types(inner_strategy)
112112

113113

114-
def nested_list_types(item_strategy=primitive_types):
115-
return st.recursive(item_strategy, list_types)
114+
def nested_list_types(item_strategy=primitive_types, max_leaves=3):
115+
return st.recursive(item_strategy, list_types, max_leaves=max_leaves)
116116

117117

118-
def nested_struct_types(item_strategy=primitive_types):
119-
return st.recursive(item_strategy, struct_types)
118+
def nested_struct_types(item_strategy=primitive_types, max_leaves=3):
119+
return st.recursive(item_strategy, struct_types, max_leaves=max_leaves)
120120

121121

122-
def nested_complex_types(inner_strategy=primitive_types):
123-
return st.recursive(inner_strategy, complex_types)
122+
def nested_complex_types(inner_strategy=primitive_types, max_leaves=3):
123+
return st.recursive(inner_strategy, complex_types, max_leaves=max_leaves)
124124

125125

126126
def schemas(type_strategy=primitive_types, max_fields=None):
@@ -156,14 +156,13 @@ def arrays(draw, type, size=None):
156156
shape = (size,)
157157

158158
if pa.types.is_list(type):
159-
# TODO(kszucs) limit the depth
160159
offsets = draw(npst.arrays(np.uint8(), shape=shape)).cumsum() // 20
161160
offsets = np.insert(offsets, 0, 0, axis=0) # prepend with zero
162161
values = draw(arrays(type.value_type, size=int(offsets.sum())))
163162
return pa.ListArray.from_arrays(offsets, values)
164163

165164
if pa.types.is_struct(type):
166-
h.assume(len(type) > 0) # TODO(kszucs): create issue -> pa.struct([])
165+
h.assume(len(type) > 0)
167166
names, child_arrays = [], []
168167
for field in type:
169168
names.append(field.name)
@@ -190,12 +189,11 @@ def arrays(draw, type, size=None):
190189
value = st.binary()
191190
elif pa.types.is_string(type):
192191
value = st.text()
193-
# elif pa.types.is_decimal(type):
194-
# # TODO(kszucs): properly limit the precision
195-
# value = st.decimals(places=type.scale, allow_infinity=False)
196-
# type = None # We let arrow infer it from the values
192+
elif pa.types.is_decimal(type):
193+
# TODO(kszucs): properly limit the precision
194+
# value = st.decimals(places=type.scale, allow_infinity=False)
195+
h.reject()
197196
else:
198-
h.assume(not pa.types.is_decimal(type))
199197
raise NotImplementedError(type)
200198

201199
values = st.lists(value, min_size=size, max_size=size)
@@ -234,10 +232,27 @@ def record_batches(draw, type, rows=None, max_fields=None):
234232

235233
schema = draw(schemas(type, max_fields=max_fields))
236234
children = [draw(arrays(field.type, size=rows)) for field in schema]
235+
# TODO(kszucs): the names and schame arguments are not consistent with
236+
# Table.from_array's arguments
237237
return pa.RecordBatch.from_arrays(children, names=schema)
238238

239239

240+
@st.composite
241+
def tables(draw, type, rows=None, max_fields=None):
242+
if isinstance(rows, st.SearchStrategy):
243+
rows = draw(rows)
244+
elif rows is None:
245+
rows = draw(_default_array_sizes)
246+
elif not isinstance(rows, int):
247+
raise TypeError('Rows must be an integer')
248+
249+
schema = draw(schemas(type, max_fields=max_fields))
250+
children = [draw(arrays(field.type, size=rows)) for field in schema]
251+
return pa.Table.from_arrays(children, schema=schema)
252+
253+
240254
all_arrays = arrays(all_types)
241255
all_chunked_arrays = chunked_arrays(all_types)
242256
all_columns = columns(all_types)
243257
all_record_batches = record_batches(all_types)
258+
all_tables = tables(all_types)

python/pyarrow/tests/test_strategies.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
# under the License.
1717

1818
import hypothesis as h
19-
import hypothesis.strategies as st
2019

2120
import pyarrow as pa
2221
import pyarrow.tests.strategies as past
@@ -57,10 +56,6 @@ def test_record_batches(record_bath):
5756
assert isinstance(record_bath, pa.lib.RecordBatch)
5857

5958

60-
############################################################
61-
62-
63-
@h.given(st.text(), past.all_arrays | past.all_chunked_arrays)
64-
def test_column_factory(name, arr):
65-
column = pa.column(name, arr)
66-
assert isinstance(column, pa.Column)
59+
@h.given(past.all_tables)
60+
def test_tables(table):
61+
assert isinstance(table, pa.lib.Table)

0 commit comments

Comments
 (0)