Skip to content

Commit a3d5aab

Browse files
cmaloneyvstinner
andauthored
gh-129005: Align FileIO.readall between _pyio and _io (#129705)
Utilize `bytearray.resize()` and `os.readinto()` to reduce copies and match behavior of `_io.FileIO.readall()`. There is still an extra copy which means twice the memory required compared to FileIO because there isn't a zero-copy path from `bytearray` -> `bytes` currently. On my system reading a 2 GB file: `./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read -v` Goes from ~2.7 seconds -> ~2.2 seconds Co-authored-by: Victor Stinner <[email protected]>
1 parent ae132ed commit a3d5aab

File tree

2 files changed

+25
-14
lines changed

2 files changed

+25
-14
lines changed

Lib/_pyio.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,17 @@ def write(self, b):
14541454
return BufferedWriter.write(self, b)
14551455

14561456

1457+
def _new_buffersize(bytes_read):
1458+
# Parallels _io/fileio.c new_buffersize
1459+
if bytes_read > 65536:
1460+
addend = bytes_read >> 3
1461+
else:
1462+
addend = 256 + bytes_read
1463+
if addend < DEFAULT_BUFFER_SIZE:
1464+
addend = DEFAULT_BUFFER_SIZE
1465+
return bytes_read + addend
1466+
1467+
14571468
class FileIO(RawIOBase):
14581469
_fd = -1
14591470
_created = False
@@ -1672,22 +1683,20 @@ def readall(self):
16721683
except OSError:
16731684
pass
16741685

1675-
result = bytearray()
1676-
while True:
1677-
if len(result) >= bufsize:
1678-
bufsize = len(result)
1679-
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1680-
n = bufsize - len(result)
1681-
try:
1682-
chunk = os.read(self._fd, n)
1683-
except BlockingIOError:
1684-
if result:
1685-
break
1686+
result = bytearray(bufsize)
1687+
bytes_read = 0
1688+
try:
1689+
while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
1690+
bytes_read += n
1691+
if bytes_read >= len(result):
1692+
result.resize(_new_buffersize(bytes_read))
1693+
except BlockingIOError:
1694+
if not bytes_read:
16861695
return None
1687-
if not chunk: # reached the end of the file
1688-
break
1689-
result += chunk
16901696

1697+
assert len(result) - bytes_read >= 1, \
1698+
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
1699+
result.resize(bytes_read)
16911700
return bytes(result)
16921701

16931702
def readinto(self, buffer):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer
2+
using the same algorithm ``_io.FileIO.readall()`` uses.

0 commit comments

Comments
 (0)