Skip to content

Commit 4a300d7

Browse files
committed
pythongh-101178: refactor base64.b85encode to be memory friendly
Refactor code to make use of generators instead of allocating 2 potentially huge lists for large datasets
1 parent 77d9f1e commit 4a300d7

File tree

2 files changed

+24
-8
lines changed

2 files changed

+24
-8
lines changed

Lib/base64.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -307,21 +307,35 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
307307
padding = (-len(b)) % 4
308308
if padding:
309309
b = b + b'\0' * padding
310-
words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
311310

312-
chunks = [b'z' if foldnuls and not word else
311+
unpack = struct.Struct("!I").unpack
312+
ibytes = (b[i:i+4] for i in range(0, len(b), 4)) # 4 bytes each
313+
words = (unpack(i)[0] for i in ibytes)
314+
315+
chunks = (b'z' if foldnuls and not word else
313316
b'y' if foldspaces and word == 0x20202020 else
314317
(chars2[word // 614125] +
315318
chars2[word // 85 % 7225] +
316319
chars[word % 85])
317-
for word in words]
320+
for word in words)
321+
322+
last = None
323+
ret = bytearray()
324+
for chunk in chunks:
325+
last = chunk
326+
ret.extend(chunk)
327+
328+
if last and padding and not pad:
329+
ret[-len(last):] = []
330+
331+
if last == b'z':
332+
last = chars[0] * 5
333+
last = last[:-padding]
334+
335+
ret.extend(last)
318336

319-
if padding and not pad:
320-
if chunks[-1] == b'z':
321-
chunks[-1] = chars[0] * 5
322-
chunks[-1] = chunks[-1][:-padding]
337+
return bytes(ret)
323338

324-
return b''.join(chunks)
325339

326340
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
327341
"""Encode bytes-like object b using Ascii85 and return a bytes object.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Refactored :func:`base64.b58encode` to drastically reduce its memory
2+
footprint when used with large input data

0 commit comments

Comments
 (0)