Skip to content

Commit 0a1ff24

Browse files
authored
bpo-17852: Maintain a list of BufferedWriter objects. Flush them on exit. (#3372)
* Maintain a list of BufferedWriter objects. Flush them on exit. In Python 3, the buffer and the underlying file object are separate and so the order in which objects are finalized matters. This is unlike Python 2 where the file and buffer were a single object and finalization was done for both at the same time. In Python 3, if the file is finalized and closed before the buffer then the data in the buffer is lost. This change adds a doubly linked list of open file buffers. An atexit hook ensures they are flushed before proceeding with interpreter shutdown. This is addition does not remove the need to properly close files as there are other reasons why buffered data could get lost during finalization. Initial patch by Armin Rigo. * Use weakref.WeakSet instead of WeakKeyDictionary. * Simplify buffered double-linked list types. * In _flush_all_writers(), suppress errors from flush(). * Remove NEWS entry, use blurb. * Take more care when flushing file buffers from atexit. The previous implementation was not careful enough to avoid causing issues in multi-threaded cases. Check for buf->ok and buf->finalizing before actually doing the flush. Also, increase the refcnt to ensure the object does not disappear.
1 parent da9b4cf commit 0a1ff24

File tree

5 files changed

+80
-1
lines changed

5 files changed

+80
-1
lines changed

Lib/_pyio.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,7 @@ def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
11821182
self.buffer_size = buffer_size
11831183
self._write_buf = bytearray()
11841184
self._write_lock = Lock()
1185+
_register_writer(self)
11851186

11861187
def writable(self):
11871188
return self.raw.writable()
@@ -2571,3 +2572,26 @@ def encoding(self):
25712572
def detach(self):
25722573
# This doesn't make sense on StringIO.
25732574
self._unsupported("detach")
2575+
2576+
2577+
# ____________________________________________________________
2578+
2579+
import atexit, weakref
2580+
2581+
_all_writers = weakref.WeakSet()
2582+
2583+
def _register_writer(w):
2584+
# keep weak-ref to buffered writer
2585+
_all_writers.add(w)
2586+
2587+
def _flush_all_writers():
2588+
# Ensure all buffered writers are flushed before proceeding with
2589+
# normal shutdown. Otherwise, if the underlying file objects get
2590+
# finalized before the buffered writer wrapping it then any buffered
2591+
# data will be lost.
2592+
for w in _all_writers:
2593+
try:
2594+
w.flush()
2595+
except:
2596+
pass
2597+
atexit.register(_flush_all_writers)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Maintain a list of open buffered files, flush them before exiting the
2+
interpreter. Based on a patch from Armin Rigo.

Modules/_io/_iomodule.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,8 @@ PyInit__io(void)
766766
!(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0)))
767767
goto fail;
768768

769+
_Py_PyAtExit(_PyIO_atexit_flush);
770+
769771
state->initialized = 1;
770772

771773
return m;

Modules/_io/_iomodule.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,5 @@ extern PyObject *_PyIO_empty_str;
183183
extern PyObject *_PyIO_empty_bytes;
184184

185185
extern PyTypeObject _PyBytesIOBuffer_Type;
186+
187+
extern void _PyIO_atexit_flush(void);

Modules/_io/bufferedio.c

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ bufferediobase_write(PyObject *self, PyObject *args)
197197
}
198198

199199

200-
typedef struct {
200+
typedef struct _buffered {
201201
PyObject_HEAD
202202

203203
PyObject *raw;
@@ -239,8 +239,18 @@ typedef struct {
239239

240240
PyObject *dict;
241241
PyObject *weakreflist;
242+
243+
/* a doubly-linked chained list of "buffered" objects that need to
244+
be flushed when the process exits */
245+
struct _buffered *next, *prev;
242246
} buffered;
243247

248+
/* the actual list of buffered objects */
249+
static buffered buffer_list_end = {
250+
.next = &buffer_list_end,
251+
.prev = &buffer_list_end
252+
};
253+
244254
/*
245255
Implementation notes:
246256
@@ -378,10 +388,21 @@ _enter_buffered_busy(buffered *self)
378388
(self->buffer_size * (size / self->buffer_size)))
379389

380390

391+
static void
392+
remove_from_linked_list(buffered *self)
393+
{
394+
self->next->prev = self->prev;
395+
self->prev->next = self->next;
396+
self->prev = NULL;
397+
self->next = NULL;
398+
}
399+
381400
static void
382401
buffered_dealloc(buffered *self)
383402
{
384403
self->finalizing = 1;
404+
if (self->next != NULL)
405+
remove_from_linked_list(self);
385406
if (_PyIOBase_finalize((PyObject *) self) < 0)
386407
return;
387408
_PyObject_GC_UNTRACK(self);
@@ -1805,10 +1826,38 @@ _io_BufferedWriter___init___impl(buffered *self, PyObject *raw,
18051826
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type &&
18061827
Py_TYPE(raw) == &PyFileIO_Type);
18071828

1829+
if (self->next == NULL) {
1830+
self->prev = &buffer_list_end;
1831+
self->next = buffer_list_end.next;
1832+
buffer_list_end.next->prev = self;
1833+
buffer_list_end.next = self;
1834+
}
1835+
18081836
self->ok = 1;
18091837
return 0;
18101838
}
18111839

1840+
/*
1841+
* Ensure all buffered writers are flushed before proceeding with
1842+
* normal shutdown. Otherwise, if the underlying file objects get
1843+
* finalized before the buffered writer wrapping it then any buffered
1844+
* data will be lost.
1845+
*/
1846+
void _PyIO_atexit_flush(void)
1847+
{
1848+
while (buffer_list_end.next != &buffer_list_end) {
1849+
buffered *buf = buffer_list_end.next;
1850+
remove_from_linked_list(buf);
1851+
if (buf->ok && !buf->finalizing) {
1852+
/* good state and not finalizing */
1853+
Py_INCREF(buf);
1854+
buffered_flush(buf, NULL);
1855+
Py_DECREF(buf);
1856+
PyErr_Clear();
1857+
}
1858+
}
1859+
}
1860+
18121861
static Py_ssize_t
18131862
_bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len)
18141863
{

0 commit comments

Comments
 (0)