Skip to content

Commit 4679548

Browse files
committed
pythongh-107137: Add _PyTupleBuilder API to the internal C API
Add _PyTupleBuilder structure and functions: * _PyTupleBuilder_Init() * _PyTupleBuilder_Alloc() * _PyTupleBuilder_Append() * _PyTupleBuilder_AppendUnsafe() * _PyTupleBuilder_Finish() * _PyTupleBuilder_Dealloc() The builder tracks the size of the tuple and resize it in _PyTupleBuilder_Finish() if needed. Don't allocate empty tuple. _PyTupleBuilder_Append() overallocates the tuple by 25% to reduce the number of _PyTuple_Resize() calls.
1 parent 0ae4870 commit 4679548

File tree

4 files changed

+169
-88
lines changed

4 files changed

+169
-88
lines changed

Include/internal/pycore_tuple.h

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,113 @@ typedef struct {
7373
PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */
7474
} _PyTupleIterObject;
7575

76+
77+
// --- _PyTupleBuilder API ---------------------------------------------------
78+
79+
typedef struct _PyTupleBuilder {
80+
PyObject *tuple;
81+
Py_ssize_t size;
82+
Py_ssize_t allocated;
83+
} _PyTupleBuilder;
84+
85+
static inline int
86+
_PyTupleBuilder_Alloc(_PyTupleBuilder *builder, size_t size)
87+
{
88+
if (size > (size_t)PY_SSIZE_T_MAX) {
89+
/* Check for overflow */
90+
PyErr_NoMemory();
91+
return -1;
92+
}
93+
if (size <= (size_t)builder->allocated) {
94+
return 0;
95+
}
96+
Py_ssize_t ssize = (Py_ssize_t)size;
97+
98+
if (builder->tuple != NULL) {
99+
if (_PyTuple_Resize(&builder->tuple, ssize) < 0) {
100+
return -1;
101+
}
102+
}
103+
else {
104+
builder->tuple = PyTuple_New(ssize);
105+
if (builder->tuple == NULL) {
106+
return -1;
107+
}
108+
}
109+
builder->allocated = ssize;
110+
return 0;
111+
}
112+
113+
static inline int
114+
_PyTupleBuilder_Init(_PyTupleBuilder *builder, Py_ssize_t size)
115+
{
116+
memset(builder, 0, sizeof(*builder));
117+
118+
int res;
119+
if (size > 0) {
120+
res = _PyTupleBuilder_Alloc(builder, (size_t)size);
121+
}
122+
else {
123+
res = 0;
124+
}
125+
return res;
126+
}
127+
128+
// The tuple builder must have already enough allocated items to store item.
129+
static inline void
130+
_PyTupleBuilder_AppendUnsafe(_PyTupleBuilder *builder, PyObject *item)
131+
{
132+
assert(builder->size < builder->allocated);
133+
PyTuple_SET_ITEM(builder->tuple, builder->size, item);
134+
builder->size++;
135+
}
136+
137+
static inline int
138+
_PyTupleBuilder_Append(_PyTupleBuilder *builder, PyObject *item)
139+
{
140+
if (builder->size == PY_SSIZE_T_MAX) {
141+
// prevent integer overflow
142+
PyErr_NoMemory();
143+
return -1;
144+
}
145+
if (builder->size >= builder->allocated) {
146+
size_t allocated = (size_t)builder->size;
147+
allocated += (allocated >> 2); // Over-allocate by 25%
148+
if (_PyTupleBuilder_Alloc(builder, allocated) < 0) {
149+
return -1;
150+
}
151+
}
152+
_PyTupleBuilder_AppendUnsafe(builder, item);
153+
return 0;
154+
}
155+
156+
static inline void
157+
_PyTupleBuilder_Dealloc(_PyTupleBuilder *builder)
158+
{
159+
Py_CLEAR(builder->tuple);
160+
}
161+
162+
static inline PyObject*
163+
_PyTupleBuilder_Finish(_PyTupleBuilder *builder)
164+
{
165+
if (builder->tuple == NULL) {
166+
assert(builder->size == 0);
167+
_PyTupleBuilder_Dealloc(builder);
168+
// return the empty tuple singleton
169+
return PyTuple_New(0);
170+
}
171+
172+
if (_PyTuple_Resize(&builder->tuple, builder->size) < 0) {
173+
_PyTupleBuilder_Dealloc(builder);
174+
return NULL;
175+
}
176+
177+
PyObject *result = builder->tuple;
178+
builder->tuple = NULL;
179+
return result;
180+
}
181+
182+
76183
#ifdef __cplusplus
77184
}
78185
#endif

Modules/itertoolsmodule.c

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include "pycore_moduleobject.h" // _PyModule_GetState()
55
#include "pycore_typeobject.h" // _PyType_GetModuleState()
66
#include "pycore_object.h" // _PyObject_GC_TRACK()
7-
#include "pycore_tuple.h" // _PyTuple_ITEMS()
7+
#include "pycore_tuple.h" // _PyTupleBuilder
88
#include "structmember.h" // PyMemberDef
99
#include <stddef.h> // offsetof()
1010

@@ -193,47 +193,42 @@ batched_traverse(batchedobject *bo, visitproc visit, void *arg)
193193
static PyObject *
194194
batched_next(batchedobject *bo)
195195
{
196-
Py_ssize_t i;
197-
Py_ssize_t n = bo->batch_size;
198196
PyObject *it = bo->it;
199-
PyObject *item;
200-
PyObject *result;
201-
202197
if (it == NULL) {
203198
return NULL;
204199
}
205-
result = PyTuple_New(n);
206-
if (result == NULL) {
200+
201+
_PyTupleBuilder builder;
202+
Py_ssize_t n = bo->batch_size;
203+
if (_PyTupleBuilder_Init(&builder, n) < 0) {
207204
return NULL;
208205
}
206+
209207
iternextfunc iternext = *Py_TYPE(it)->tp_iternext;
210-
PyObject **items = _PyTuple_ITEMS(result);
211-
for (i=0 ; i < n ; i++) {
212-
item = iternext(it);
208+
for (Py_ssize_t i=0 ; i < n; i++) {
209+
PyObject *item = iternext(it);
213210
if (item == NULL) {
214-
goto null_item;
211+
if (PyErr_Occurred()) {
212+
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
213+
/* Input raised an exception other than StopIteration */
214+
goto error;
215+
}
216+
PyErr_Clear();
217+
// StopIteration was raised
218+
}
219+
if (i == 0) {
220+
goto error;
221+
}
222+
break;
215223
}
216-
items[i] = item;
224+
_PyTupleBuilder_AppendUnsafe(&builder, item);
217225
}
218-
return result;
226+
return _PyTupleBuilder_Finish(&builder);
219227

220-
null_item:
221-
if (PyErr_Occurred()) {
222-
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
223-
/* Input raised an exception other than StopIteration */
224-
Py_CLEAR(bo->it);
225-
Py_DECREF(result);
226-
return NULL;
227-
}
228-
PyErr_Clear();
229-
}
230-
if (i == 0) {
231-
Py_CLEAR(bo->it);
232-
Py_DECREF(result);
233-
return NULL;
234-
}
235-
_PyTuple_Resize(&result, i);
236-
return result;
228+
error:
229+
_PyTupleBuilder_Dealloc(&builder);
230+
Py_CLEAR(bo->it);
231+
return NULL;
237232
}
238233

239234
static PyType_Slot batched_slots[] = {

Objects/abstract.c

Lines changed: 26 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2074,11 +2074,6 @@ PySequence_DelSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2)
20742074
PyObject *
20752075
PySequence_Tuple(PyObject *v)
20762076
{
2077-
PyObject *it; /* iter(v) */
2078-
Py_ssize_t n; /* guess for result tuple size */
2079-
PyObject *result = NULL;
2080-
Py_ssize_t j;
2081-
20822077
if (v == NULL) {
20832078
return null_error();
20842079
}
@@ -2091,66 +2086,53 @@ PySequence_Tuple(PyObject *v)
20912086
a copy, so there's no need for exactness below. */
20922087
return Py_NewRef(v);
20932088
}
2094-
if (PyList_CheckExact(v))
2089+
if (PyList_CheckExact(v)) {
20952090
return PyList_AsTuple(v);
2091+
}
20962092

2097-
/* Get iterator. */
2098-
it = PyObject_GetIter(v);
2099-
if (it == NULL)
2093+
_PyTupleBuilder builder;
2094+
if (_PyTupleBuilder_Init(&builder, 0) < 0) {
21002095
return NULL;
2096+
}
2097+
2098+
/* Get iterator. */
2099+
PyObject *it = PyObject_GetIter(v); // iter(v)
2100+
if (it == NULL) {
2101+
goto Fail;
2102+
}
21012103

21022104
/* Guess result size and allocate space. */
2103-
n = PyObject_LengthHint(v, 10);
2104-
if (n == -1)
2105+
Py_ssize_t n = PyObject_LengthHint(v, 10); // Guess for result tuple size
2106+
if (n == -1) {
21052107
goto Fail;
2106-
result = PyTuple_New(n);
2107-
if (result == NULL)
2108+
}
2109+
if (_PyTupleBuilder_Alloc(&builder, n) < 0) {
21082110
goto Fail;
2111+
}
21092112

21102113
/* Fill the tuple. */
2114+
Py_ssize_t j;
21112115
for (j = 0; ; ++j) {
21122116
PyObject *item = PyIter_Next(it);
21132117
if (item == NULL) {
2114-
if (PyErr_Occurred())
2118+
if (PyErr_Occurred()) {
21152119
goto Fail;
2120+
}
21162121
break;
21172122
}
2118-
if (j >= n) {
2119-
size_t newn = (size_t)n;
2120-
/* The over-allocation strategy can grow a bit faster
2121-
than for lists because unlike lists the
2122-
over-allocation isn't permanent -- we reclaim
2123-
the excess before the end of this routine.
2124-
So, grow by ten and then add 25%.
2125-
*/
2126-
newn += 10u;
2127-
newn += newn >> 2;
2128-
if (newn > PY_SSIZE_T_MAX) {
2129-
/* Check for overflow */
2130-
PyErr_NoMemory();
2131-
Py_DECREF(item);
2132-
goto Fail;
2133-
}
2134-
n = (Py_ssize_t)newn;
2135-
if (_PyTuple_Resize(&result, n) != 0) {
2136-
Py_DECREF(item);
2137-
goto Fail;
2138-
}
2123+
2124+
if (_PyTupleBuilder_Append(&builder, item) < 0) {
2125+
Py_DECREF(item);
2126+
goto Fail;
21392127
}
2140-
PyTuple_SET_ITEM(result, j, item);
21412128
}
21422129

2143-
/* Cut tuple back if guess was too large. */
2144-
if (j < n &&
2145-
_PyTuple_Resize(&result, j) != 0)
2146-
goto Fail;
2147-
21482130
Py_DECREF(it);
2149-
return result;
2131+
return _PyTupleBuilder_Finish(&builder);
21502132

21512133
Fail:
2152-
Py_XDECREF(result);
2153-
Py_DECREF(it);
2134+
_PyTupleBuilder_Dealloc(&builder);
2135+
Py_XDECREF(it);
21542136
return NULL;
21552137
}
21562138

Objects/structseq.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -390,11 +390,9 @@ count_members(PyStructSequence_Desc *desc, Py_ssize_t *n_unnamed_members) {
390390
static int
391391
initialize_structseq_dict(PyStructSequence_Desc *desc, PyObject* dict,
392392
Py_ssize_t n_members, Py_ssize_t n_unnamed_members) {
393-
PyObject *v;
394-
395393
#define SET_DICT_FROM_SIZE(key, value) \
396394
do { \
397-
v = PyLong_FromSsize_t(value); \
395+
PyObject *v = PyLong_FromSsize_t(value); \
398396
if (v == NULL) { \
399397
return -1; \
400398
} \
@@ -410,37 +408,36 @@ initialize_structseq_dict(PyStructSequence_Desc *desc, PyObject* dict,
410408
SET_DICT_FROM_SIZE(unnamed_fields_key, n_unnamed_members);
411409

412410
// Prepare and set __match_args__
413-
Py_ssize_t i, k;
414-
PyObject* keys = PyTuple_New(desc->n_in_sequence);
415-
if (keys == NULL) {
411+
_PyTupleBuilder keys_builder;
412+
if (_PyTupleBuilder_Init(&keys_builder, desc->n_in_sequence) < 0) {
416413
return -1;
417414
}
418415

419-
for (i = k = 0; i < desc->n_in_sequence; ++i) {
416+
for (Py_ssize_t i = 0; i < desc->n_in_sequence; ++i) {
420417
if (desc->fields[i].name == PyStructSequence_UnnamedField) {
421418
continue;
422419
}
423420
PyObject* new_member = PyUnicode_FromString(desc->fields[i].name);
424421
if (new_member == NULL) {
425422
goto error;
426423
}
427-
PyTuple_SET_ITEM(keys, k, new_member);
428-
k++;
424+
_PyTupleBuilder_AppendUnsafe(&keys_builder, new_member);
429425
}
430426

431-
if (_PyTuple_Resize(&keys, k) == -1) {
427+
PyObject *keys = _PyTupleBuilder_Finish(&keys_builder);
428+
if (keys == NULL) {
432429
goto error;
433430
}
434-
435431
if (PyDict_SetItemString(dict, match_args_key, keys) < 0) {
432+
Py_DECREF(keys);
436433
goto error;
437434
}
438-
439435
Py_DECREF(keys);
436+
440437
return 0;
441438

442439
error:
443-
Py_DECREF(keys);
440+
_PyTupleBuilder_Dealloc(&keys_builder);
444441
return -1;
445442
}
446443

0 commit comments

Comments
 (0)