Skip to content

Commit d25ba98

Browse files
committed
pythonGH-120754: Remove isatty call during regular read
For POSIX, TTYs are never regular files, so if the interpreter knows the file is regular it doesn't need to do an additional system call to check if the file is a TTY. The `open()` Python builtin requires a `stat` call at present in order to ensure the file being opened isn't a directory. That result includes the file mode which tells us if it is a regular file. There are a number of attributes from the stat which are stashed one off currently, move to stashing the whole object rather than just individual members. The stat object is reasonably large and currently the `stat_result.st_size` member cannot be modified from Python, which is needed by the `_pyio` implementation, so make the whole stat object optional. In the `_io` implementation this makes handling a stat failure simpler. At present there is no explicit user call to clear it, but if one is needed (ex. a program which has a lot of open FileIO objects and the memory becomes a problem) it would be straightforward to add. Ideally would be able to automatically clear (the values are generally used during I/O object initialization and not after. After a `write` they are no longer useful in current cases). It is fairly common pattern to scan a directory, look at the `stat` results (ex. is this file changed), and then open/read the file. In this PR I didn't update open's API to allow passing in a stat result to use, but that could be beneficial for some cases (ex. `importlib`). With this change on my Linux machine reading a small plain text file is down to 6 system calls. ```python openat(AT_FDCWD, "read_one.py", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=87, ...}) = 0 lseek(3, 0, SEEK_CUR) = 0 read(3, "from pathlib import Path\n\npath ="..., 88) = 87 read(3, "", 1) = 0 close(3) = 0 ```
1 parent f621618 commit d25ba98

File tree

3 files changed

+78
-39
lines changed

3 files changed

+78
-39
lines changed

Lib/_pyio.py

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@ def text_encoding(encoding, stacklevel=2):
6464
return encoding
6565

6666

67+
def _shortcut_isatty(raw):
68+
"""Regular files are never TTYs. In cases where we know we have a stat
69+
result, use it to shortcut / skip a call to isatty."""
70+
if raw._stat_atopen and stat.S_ISREG(raw._stat_atopen.st_mode):
71+
return False
72+
73+
return raw.isatty()
74+
75+
6776
# Wrapper for builtins.open
6877
#
6978
# Trick so that open() won't become a bound method when stored
@@ -238,18 +247,12 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
238247
result = raw
239248
try:
240249
line_buffering = False
241-
if buffering == 1 or buffering < 0 and raw.isatty():
250+
251+
if buffering == 1 or buffering < 0 and _shortcut_isatty(raw):
242252
buffering = -1
243253
line_buffering = True
244254
if buffering < 0:
245-
buffering = DEFAULT_BUFFER_SIZE
246-
try:
247-
bs = os.fstat(raw.fileno()).st_blksize
248-
except (OSError, AttributeError):
249-
pass
250-
else:
251-
if bs > 1:
252-
buffering = bs
255+
buffering = raw._blksize
253256
if buffering < 0:
254257
raise ValueError("invalid buffering size")
255258
if buffering == 0:
@@ -1565,19 +1568,15 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
15651568
os.set_inheritable(fd, False)
15661569

15671570
self._closefd = closefd
1568-
fdfstat = os.fstat(fd)
1571+
self._stat_atopen = os.fstat(fd)
15691572
try:
1570-
if stat.S_ISDIR(fdfstat.st_mode):
1573+
if stat.S_ISDIR(self._stat_atopen.st_mode):
15711574
raise IsADirectoryError(errno.EISDIR,
15721575
os.strerror(errno.EISDIR), file)
15731576
except AttributeError:
15741577
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
15751578
# don't exist.
15761579
pass
1577-
self._blksize = getattr(fdfstat, 'st_blksize', 0)
1578-
if self._blksize <= 1:
1579-
self._blksize = DEFAULT_BUFFER_SIZE
1580-
self._estimated_size = fdfstat.st_size
15811580

15821581
if _setmode:
15831582
# don't translate newlines (\r\n <=> \n)
@@ -1623,6 +1622,13 @@ def __repr__(self):
16231622
return ('<%s name=%r mode=%r closefd=%r>' %
16241623
(class_name, name, self.mode, self._closefd))
16251624

1625+
@property
1626+
def _blksize(self):
1627+
if self._stat_atopen:
1628+
return getattr(self._stat_atopen, "st_blksize", DEFAULT_BUFFER_SIZE)
1629+
1630+
return DEFAULT_BUFFER_SIZE
1631+
16261632
def _checkReadable(self):
16271633
if not self._readable:
16281634
raise UnsupportedOperation('File not open for reading')
@@ -1655,16 +1661,16 @@ def readall(self):
16551661
"""
16561662
self._checkClosed()
16571663
self._checkReadable()
1658-
if self._estimated_size <= 0:
1664+
if not self._stat_atopen or self._stat_atopen.st_size <= 0:
16591665
bufsize = DEFAULT_BUFFER_SIZE
16601666
else:
1661-
bufsize = self._estimated_size + 1
1667+
bufsize = self._stat_atopen.st_size + 1
16621668

1663-
if self._estimated_size > 65536:
1669+
if self._stat_atopen.st_size > 65536:
16641670
try:
16651671
pos = os.lseek(self._fd, 0, SEEK_CUR)
1666-
if self._estimated_size >= pos:
1667-
bufsize = self._estimated_size - pos + 1
1672+
if self._stat_atopen.st_size >= pos:
1673+
bufsize = self._stat_atopen.st_size - pos + 1
16681674
except OSError:
16691675
pass
16701676

@@ -1742,7 +1748,7 @@ def truncate(self, size=None):
17421748
if size is None:
17431749
size = self.tell()
17441750
os.ftruncate(self._fd, size)
1745-
self._estimated_size = size
1751+
self._stat_atopen = None
17461752
return size
17471753

17481754
def close(self):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Skip the ``isatty`` system call on just-opened regular files. This provides a
2+
slight performance improvement when reading whole files.

Modules/_io/fileio.c

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,7 @@ typedef struct {
7474
signed int seekable : 2; /* -1 means unknown */
7575
unsigned int closefd : 1;
7676
char finalizing;
77-
unsigned int blksize;
78-
Py_off_t estimated_size;
77+
struct _Py_stat_struct *stat_atopen;
7978
PyObject *weakreflist;
8079
PyObject *dict;
8180
} fileio;
@@ -199,8 +198,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
199198
self->writable = 0;
200199
self->appending = 0;
201200
self->seekable = -1;
202-
self->blksize = 0;
203-
self->estimated_size = -1;
201+
self->stat_atopen = NULL;
204202
self->closefd = 1;
205203
self->weakreflist = NULL;
206204
}
@@ -256,7 +254,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
256254
#elif !defined(MS_WINDOWS)
257255
int *atomic_flag_works = NULL;
258256
#endif
259-
struct _Py_stat_struct fdfstat;
260257
int fstat_result;
261258
int async_err = 0;
262259

@@ -454,9 +451,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
454451
#endif
455452
}
456453

457-
self->blksize = DEFAULT_BUFFER_SIZE;
454+
self->stat_atopen = PyMem_New(struct _Py_stat_struct, 1);
455+
if (self->stat_atopen == NULL) {
456+
PyErr_Format(PyExc_MemoryError, "Unable to allocate space for stat result");
457+
goto error;
458+
}
458459
Py_BEGIN_ALLOW_THREADS
459-
fstat_result = _Py_fstat_noraise(self->fd, &fdfstat);
460+
fstat_result = _Py_fstat_noraise(self->fd, self->stat_atopen);
460461
Py_END_ALLOW_THREADS
461462
if (fstat_result < 0) {
462463
/* Tolerate fstat() errors other than EBADF. See Issue #25717, where
@@ -471,25 +472,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
471472
#endif
472473
goto error;
473474
}
475+
476+
PyMem_Free(self->stat_atopen);
477+
self->stat_atopen = NULL;
474478
}
475479
else {
476480
#if defined(S_ISDIR) && defined(EISDIR)
477481
/* On Unix, open will succeed for directories.
478482
In Python, there should be no file objects referring to
479483
directories, so we need a check. */
480-
if (S_ISDIR(fdfstat.st_mode)) {
484+
if (S_ISDIR(self->stat_atopen->st_mode)) {
481485
errno = EISDIR;
482486
PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, nameobj);
483487
goto error;
484488
}
485489
#endif /* defined(S_ISDIR) */
486-
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
487-
if (fdfstat.st_blksize > 1)
488-
self->blksize = fdfstat.st_blksize;
489-
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
490-
if (fdfstat.st_size < PY_SSIZE_T_MAX) {
491-
self->estimated_size = (Py_off_t)fdfstat.st_size;
492-
}
493490
}
494491

495492
#if defined(MS_WINDOWS) || defined(__CYGWIN__)
@@ -521,6 +518,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
521518
internal_close(self);
522519
_PyErr_ChainExceptions1(exc);
523520
}
521+
if (self->stat_atopen != NULL) {
522+
PyMem_Free(self->stat_atopen);
523+
self->stat_atopen = NULL;
524+
}
524525

525526
done:
526527
#ifdef MS_WINDOWS
@@ -553,6 +554,10 @@ fileio_dealloc(fileio *self)
553554
if (_PyIOBase_finalize((PyObject *) self) < 0)
554555
return;
555556
_PyObject_GC_UNTRACK(self);
557+
if (self->stat_atopen != NULL) {
558+
PyMem_Free(self->stat_atopen);
559+
self->stat_atopen = NULL;
560+
}
556561
if (self->weakreflist != NULL)
557562
PyObject_ClearWeakRefs((PyObject *) self);
558563
(void)fileio_clear(self);
@@ -725,7 +730,13 @@ _io_FileIO_readall_impl(fileio *self)
725730
return err_closed();
726731
}
727732

728-
end = self->estimated_size;
733+
if (self->stat_atopen != NULL) {
734+
end = (Py_off_t)Py_MIN(self->stat_atopen->st_size, PY_SSIZE_T_MAX);
735+
}
736+
else {
737+
end = -1;
738+
}
739+
729740
if (end <= 0) {
730741
/* Use a default size and resize as needed. */
731742
bufsize = SMALLCHUNK;
@@ -1098,7 +1109,10 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj)
10981109
estimate, that it is much larger than the actual size can result in a
10991110
significant over allocation and sometimes a MemoryError / running out of
11001111
memory. */
1101-
self->estimated_size = pos;
1112+
if (self->stat_atopen != NULL) {
1113+
PyMem_Free(self->stat_atopen);
1114+
self->stat_atopen = NULL;
1115+
}
11021116

11031117
return posobj;
11041118
}
@@ -1179,6 +1193,11 @@ _io_FileIO_isatty_impl(fileio *self)
11791193

11801194
if (self->fd < 0)
11811195
return err_closed();
1196+
1197+
if (self->stat_atopen != NULL && S_ISREG(self->stat_atopen->st_mode)) {
1198+
return Py_False;
1199+
}
1200+
11821201
Py_BEGIN_ALLOW_THREADS
11831202
_Py_BEGIN_SUPPRESS_IPH
11841203
res = isatty(self->fd);
@@ -1229,16 +1248,28 @@ get_mode(fileio *self, void *closure)
12291248
return PyUnicode_FromString(mode_string(self));
12301249
}
12311250

1251+
static PyObject *
1252+
get_blksize(fileio *self, void *closure)
1253+
{
1254+
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
1255+
if (self->stat_atopen != NULL && self->stat_atopen->st_blksize > 1) {
1256+
return PyLong_FromLong(self->stat_atopen->st_blksize);
1257+
}
1258+
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
1259+
return PyLong_FromLong(DEFAULT_BUFFER_SIZE);
1260+
}
1261+
1262+
12321263
static PyGetSetDef fileio_getsetlist[] = {
12331264
{"closed", (getter)get_closed, NULL, "True if the file is closed"},
12341265
{"closefd", (getter)get_closefd, NULL,
12351266
"True if the file descriptor will be closed by close()."},
12361267
{"mode", (getter)get_mode, NULL, "String giving the file mode"},
1268+
{"_blksize", (getter)get_blksize, NULL, "blksize to use for io"},
12371269
{NULL},
12381270
};
12391271

12401272
static PyMemberDef fileio_members[] = {
1241-
{"_blksize", Py_T_UINT, offsetof(fileio, blksize), 0},
12421273
{"_finalizing", Py_T_BOOL, offsetof(fileio, finalizing), 0},
12431274
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(fileio, weakreflist), Py_READONLY},
12441275
{"__dictoffset__", Py_T_PYSSIZET, offsetof(fileio, dict), Py_READONLY},

0 commit comments

Comments
 (0)