Skip to content

Commit c2e86c4

Browse files
committed
pythonGH-120754: Remove isatty call during regular read
For POSIX, TTYs are never regular files, so if the interpreter knows the file is regular it doesn't need to do an additional system call to check if the file is a tty. The `open()` API requires a `stat` call at present in order to make sure the file being opened isn't a directory. That result includes the file mode which tells us if it is a regular file. There's a number of attributes from the stat which are stashed one off currently, move to stashing the whole object rather than just individual members. The stat object is reasonably large and currently the `stat_result.st_size` member cannot be modified from Python, which is needed by the `_pyio` implementation, so make the whole stat object optional. In the `_io` implementation this makes handling a stat failure simpler. At present there is no explicit user call to clear it, but if one is needed (ex. a program which has a lot of open FileIO objects and the memory becomes a problem) it would be straightforward to add. Ideally would be able to automatically clear (the values are generally used during I/O object initialization and not after. After a `write` they are no longer useful in current cases). It is fairly common pattern to scan a directory, look at the `stat` results (ex. is this file changed), and then open/read the file. In this PR I didn't update open's API to allow passing in a stat result to use, but that could be beneficial for some cases (ex. `importlib`). With this change on my Linux machine reading a small plain text file is down to 6 system calls. ``` openat(AT_FDCWD, "read_one.py", O_RDONLY|O_CLOEXEC) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=87, ...}) = 0 lseek(3, 0, SEEK_CUR) = 0 read(3, "from pathlib import Path\n\npath ="..., 88) = 87 read(3, "", 1) = 0 close(3) = 0 ```
1 parent f621618 commit c2e86c4

File tree

2 files changed

+68
-38
lines changed

2 files changed

+68
-38
lines changed

Lib/_pyio.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
242242
buffering = -1
243243
line_buffering = True
244244
if buffering < 0:
245-
buffering = DEFAULT_BUFFER_SIZE
246-
try:
247-
bs = os.fstat(raw.fileno()).st_blksize
248-
except (OSError, AttributeError):
249-
pass
250-
else:
251-
if bs > 1:
252-
buffering = bs
245+
buffering = raw._blksize
253246
if buffering < 0:
254247
raise ValueError("invalid buffering size")
255248
if buffering == 0:
@@ -1565,19 +1558,15 @@ def __init__(self, file, mode='r', closefd=True, opener=None):
15651558
os.set_inheritable(fd, False)
15661559

15671560
self._closefd = closefd
1568-
fdfstat = os.fstat(fd)
1561+
self._stat_atopen = os.fstat(fd)
15691562
try:
1570-
if stat.S_ISDIR(fdfstat.st_mode):
1563+
if stat.S_ISDIR(self._stat_atopen.st_mode):
15711564
raise IsADirectoryError(errno.EISDIR,
15721565
os.strerror(errno.EISDIR), file)
15731566
except AttributeError:
15741567
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
15751568
# don't exist.
15761569
pass
1577-
self._blksize = getattr(fdfstat, 'st_blksize', 0)
1578-
if self._blksize <= 1:
1579-
self._blksize = DEFAULT_BUFFER_SIZE
1580-
self._estimated_size = fdfstat.st_size
15811570

15821571
if _setmode:
15831572
# don't translate newlines (\r\n <=> \n)
@@ -1623,6 +1612,13 @@ def __repr__(self):
16231612
return ('<%s name=%r mode=%r closefd=%r>' %
16241613
(class_name, name, self.mode, self._closefd))
16251614

1615+
@property
1616+
def _blksize(self):
1617+
if self._stat_atopen and self._stat_atopen.st_blksize > 0:
1618+
return self._stat_atopen.st_blksize
1619+
1620+
return DEFAULT_BUFFER_SIZE
1621+
16261622
def _checkReadable(self):
16271623
if not self._readable:
16281624
raise UnsupportedOperation('File not open for reading')
@@ -1655,16 +1651,16 @@ def readall(self):
16551651
"""
16561652
self._checkClosed()
16571653
self._checkReadable()
1658-
if self._estimated_size <= 0:
1654+
if not self._stat_atopen or self._stat_atopen.st_size <= 0:
16591655
bufsize = DEFAULT_BUFFER_SIZE
16601656
else:
1661-
bufsize = self._estimated_size + 1
1657+
bufsize = self._stat_atopen.st_size + 1
16621658

1663-
if self._estimated_size > 65536:
1659+
if self._stat_atopen.st_size > 65536:
16641660
try:
16651661
pos = os.lseek(self._fd, 0, SEEK_CUR)
1666-
if self._estimated_size >= pos:
1667-
bufsize = self._estimated_size - pos + 1
1662+
if self._stat_atopen.st_size >= pos:
1663+
bufsize = self._stat_atopen.st_size - pos + 1
16681664
except OSError:
16691665
pass
16701666

@@ -1742,7 +1738,7 @@ def truncate(self, size=None):
17421738
if size is None:
17431739
size = self.tell()
17441740
os.ftruncate(self._fd, size)
1745-
self._estimated_size = size
1741+
self._stat_atopen = None
17461742
return size
17471743

17481744
def close(self):
@@ -1788,6 +1784,10 @@ def fileno(self):
17881784
def isatty(self):
17891785
"""True if the file is connected to a TTY device."""
17901786
self._checkClosed()
1787+
# Regular files are never TTYs, save a syscall for common "read file off
1788+
# disk" case.
1789+
if self._stat_atopen and stat.S_ISREG(self._stat_atopen.st_mode):
1790+
return False
17911791
return os.isatty(self._fd)
17921792

17931793
@property

Modules/_io/fileio.c

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,7 @@ typedef struct {
7474
signed int seekable : 2; /* -1 means unknown */
7575
unsigned int closefd : 1;
7676
char finalizing;
77-
unsigned int blksize;
78-
Py_off_t estimated_size;
77+
struct _Py_stat_struct *stat_atopen;
7978
PyObject *weakreflist;
8079
PyObject *dict;
8180
} fileio;
@@ -199,8 +198,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
199198
self->writable = 0;
200199
self->appending = 0;
201200
self->seekable = -1;
202-
self->blksize = 0;
203-
self->estimated_size = -1;
201+
self->stat_atopen = NULL;
204202
self->closefd = 1;
205203
self->weakreflist = NULL;
206204
}
@@ -256,7 +254,6 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
256254
#elif !defined(MS_WINDOWS)
257255
int *atomic_flag_works = NULL;
258256
#endif
259-
struct _Py_stat_struct fdfstat;
260257
int fstat_result;
261258
int async_err = 0;
262259

@@ -454,9 +451,13 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
454451
#endif
455452
}
456453

457-
self->blksize = DEFAULT_BUFFER_SIZE;
454+
self->stat_atopen = PyMem_New(struct _Py_stat_struct, 1);
455+
if (self->stat_atopen == NULL) {
456+
PyErr_Format(PyExc_MemoryError, "Unable to allocate space for stat result");
457+
goto error;
458+
}
458459
Py_BEGIN_ALLOW_THREADS
459-
fstat_result = _Py_fstat_noraise(self->fd, &fdfstat);
460+
fstat_result = _Py_fstat_noraise(self->fd, self->stat_atopen);
460461
Py_END_ALLOW_THREADS
461462
if (fstat_result < 0) {
462463
/* Tolerate fstat() errors other than EBADF. See Issue #25717, where
@@ -471,25 +472,21 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
471472
#endif
472473
goto error;
473474
}
475+
476+
PyMem_Free(self->stat_atopen);
477+
self->stat_atopen = NULL;
474478
}
475479
else {
476480
#if defined(S_ISDIR) && defined(EISDIR)
477481
/* On Unix, open will succeed for directories.
478482
In Python, there should be no file objects referring to
479483
directories, so we need a check. */
480-
if (S_ISDIR(fdfstat.st_mode)) {
484+
if (S_ISDIR(self->stat_atopen->st_mode)) {
481485
errno = EISDIR;
482486
PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, nameobj);
483487
goto error;
484488
}
485489
#endif /* defined(S_ISDIR) */
486-
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
487-
if (fdfstat.st_blksize > 1)
488-
self->blksize = fdfstat.st_blksize;
489-
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
490-
if (fdfstat.st_size < PY_SSIZE_T_MAX) {
491-
self->estimated_size = (Py_off_t)fdfstat.st_size;
492-
}
493490
}
494491

495492
#if defined(MS_WINDOWS) || defined(__CYGWIN__)
@@ -521,6 +518,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
521518
internal_close(self);
522519
_PyErr_ChainExceptions1(exc);
523520
}
521+
if (self->stat_atopen != NULL) {
522+
PyMem_Free(self->stat_atopen);
523+
self->stat_atopen = NULL;
524+
}
524525

525526
done:
526527
#ifdef MS_WINDOWS
@@ -553,6 +554,10 @@ fileio_dealloc(fileio *self)
553554
if (_PyIOBase_finalize((PyObject *) self) < 0)
554555
return;
555556
_PyObject_GC_UNTRACK(self);
557+
if (self->stat_atopen != NULL) {
558+
PyMem_Free(self->stat_atopen);
559+
self->stat_atopen = NULL;
560+
}
556561
if (self->weakreflist != NULL)
557562
PyObject_ClearWeakRefs((PyObject *) self);
558563
(void)fileio_clear(self);
@@ -725,7 +730,12 @@ _io_FileIO_readall_impl(fileio *self)
725730
return err_closed();
726731
}
727732

728-
end = self->estimated_size;
733+
if (self->stat_atopen) {
734+
end = (Py_off_t)Py_MIN(self->stat_atopen->st_size, PY_SSIZE_T_MAX);
735+
} else {
736+
end = -1;
737+
}
738+
729739
if (end <= 0) {
730740
/* Use a default size and resize as needed. */
731741
bufsize = SMALLCHUNK;
@@ -1098,7 +1108,10 @@ _io_FileIO_truncate_impl(fileio *self, PyTypeObject *cls, PyObject *posobj)
10981108
estimate, that it is much larger than the actual size can result in a
10991109
significant over allocation and sometimes a MemoryError / running out of
11001110
memory. */
1101-
self->estimated_size = pos;
1111+
if (self->stat_atopen) {
1112+
PyMem_Free(self->stat_atopen);
1113+
self->stat_atopen = NULL;
1114+
}
11021115

11031116
return posobj;
11041117
}
@@ -1179,6 +1192,11 @@ _io_FileIO_isatty_impl(fileio *self)
11791192

11801193
if (self->fd < 0)
11811194
return err_closed();
1195+
1196+
if (self->stat_atopen && S_ISREG(self->stat_atopen->st_mode)) {
1197+
return Py_False;
1198+
}
1199+
11821200
Py_BEGIN_ALLOW_THREADS
11831201
_Py_BEGIN_SUPPRESS_IPH
11841202
res = isatty(self->fd);
@@ -1229,16 +1247,28 @@ get_mode(fileio *self, void *closure)
12291247
return PyUnicode_FromString(mode_string(self));
12301248
}
12311249

1250+
static PyObject *
1251+
get_blksize(fileio *self, void *closure)
1252+
{
1253+
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
1254+
if (self->stat_atopen && self->stat_atopen->st_blksize > 1) {
1255+
return PyLong_FromLong(self->stat_atopen->st_blksize);
1256+
}
1257+
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
1258+
return PyLong_FromLong(DEFAULT_BUFFER_SIZE);
1259+
}
1260+
1261+
12321262
static PyGetSetDef fileio_getsetlist[] = {
12331263
{"closed", (getter)get_closed, NULL, "True if the file is closed"},
12341264
{"closefd", (getter)get_closefd, NULL,
12351265
"True if the file descriptor will be closed by close()."},
12361266
{"mode", (getter)get_mode, NULL, "String giving the file mode"},
1267+
{"_blksize", (getter)get_blksize, NULL, "blksize to use for io"},
12371268
{NULL},
12381269
};
12391270

12401271
static PyMemberDef fileio_members[] = {
1241-
{"_blksize", Py_T_UINT, offsetof(fileio, blksize), 0},
12421272
{"_finalizing", Py_T_BOOL, offsetof(fileio, finalizing), 0},
12431273
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(fileio, weakreflist), Py_READONLY},
12441274
{"__dictoffset__", Py_T_PYSSIZET, offsetof(fileio, dict), Py_READONLY},

0 commit comments

Comments
 (0)