python · romuald · Feb 16, 2025 · Feb 16, 2025 · Feb 16, 2025 · Feb 16, 2025
diff --git a/Lib/base64.py b/Lib/base64.py
@@ -298,27 +298,12 @@ def b16decode(s, casefold=False):
 
 def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
     # Helper function for a85encode and b85encode
+    # chars2 is now unused
     if not isinstance(b, bytes_types):
         b = memoryview(b).tobytes()
 
-    padding = (-len(b)) % 4
-    if padding:
-        b = b + b'\0' * padding
-    words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
-
-    chunks = [b'z' if foldnuls and not word else
-              b'y' if foldspaces and word == 0x20202020 else
-              (chars2[word // 614125] +
-               chars2[word // 85 % 7225] +
-               chars[word % 85])
-              for word in words]
-
-    if padding and not pad:
-        if chunks[-1] == b'z':
-            chunks[-1] = chars[0] * 5
-        chunks[-1] = chunks[-1][:-padding]
-
-    return b''.join(chunks)
+    return binascii._b2a_base85(b, chars=chars, pad=pad,
+                                foldnuls=foldnuls, foldspaces=foldspaces)
 
 def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
     """Encode bytes-like object b using Ascii85 and return a bytes object.
@@ -337,14 +322,13 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
     adobe controls whether the encoded byte sequence is framed with <~ and ~>,
     which is used by the Adobe implementation.
     """
-    global _a85chars, _a85chars2
+    global _a85chars
     # Delay the initialization of tables to not waste memory
     # if the function is never called
-    if _a85chars2 is None:
+    if _a85chars is None:
         _a85chars = [bytes((i,)) for i in range(33, 118)]
-        _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
 
-    result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
+    result = _85encode(b, b''.join(_a85chars), None, pad, True, foldspaces)
 
     if adobe:
         result = _A85START + result
@@ -445,13 +429,12 @@ def b85encode(b, pad=False):
     If pad is true, the input is padded with b'\\0' so its length is a multiple of
     4 bytes before encoding.
     """
-    global _b85chars, _b85chars2
+    global _b85chars
     # Delay the initialization of tables to not waste memory
     # if the function is never called
-    if _b85chars2 is None:
+    if _b85chars is None:
         _b85chars = [bytes((i,)) for i in _b85alphabet]
-        _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
-    return _85encode(b, _b85chars, _b85chars2, pad)
+    return _85encode(b, _b85alphabet, None, pad)
 
 def b85decode(b):
     """Decode the base85-encoded bytes-like object or ASCII string b

diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
@@ -528,6 +528,7 @@ def test_b85encode(self):
                 b"""0123456789!@#0^&*();:<>,. []{}""":
                 b"""VPa!sWoBn+X=-b1ZEkOHadLBXb#`}nd3r%YLqtVJM@UIZOH55pPf$@("""
                 b"""Q&d$}S6EqEFflSSG&MFiI5{CeBQRbjDkv#CIy^osE+AW7dwl""",
+            b"paddu\xc7": b'aA9O*b;k',
             b'no padding..': b'Zf_uPVPs@!Zf7no',
             b'zero compression\x00\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG00000',
             b'zero compression\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG0000',

diff --git a/Modules/binascii.c b/Modules/binascii.c
@@ -1239,13 +1239,108 @@ binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
     return rv;
 }
 
+/*[clinic input]
+binascii._b2a_base85
+
+    data: Py_buffer
+    chars: Py_buffer
+    pad: bool = False
+    foldnuls: bool = False
+    foldspaces: bool = False
+
+Utility method used by the base64 module to encode a85/b85 data
+
+    data: bytes
+    chars: 85 bytes conversion table
+    pad: use NULL-paded input if necessary
+    foldnuls: replace NULL chunks by 'z'
+    foldspaces: replace space-only chucks by 'y'
+
+[clinic start generated code]*/
+
+static PyObject *
+binascii__b2a_base85_impl(PyObject *module, Py_buffer *data,
+                          Py_buffer *chars, int pad, int foldnuls,
+                          int foldspaces)
+/*[clinic end generated code: output=cefe84c300ad7314 input=3c8faf77b992dcc2]*/
+{
+    if (chars->len != 85) {
+        PyErr_SetString(PyExc_ValueError,
+                        "chars must be exactly 85 bytes long");
+        return NULL;
+    }
+
+    _PyBytesWriter writer;
+    _PyBytesWriter_Init(&writer);
+
+    const size_t bin_len = data->len;
+
+    // Allocate up to maxium encoded length, adjusted at end
+    const size_t ascii_len = ((bin_len + 3) / 4) * 5;
+
+    unsigned char *ascii_data = _PyBytesWriter_Alloc(&writer, ascii_len);
+    if (ascii_data == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    const unsigned char *table = chars->buf;
+    const unsigned char *bin_data = data->buf;
+
+    size_t i = 0 ;
+    int padding = 0;
+
+    // Conversion largely inspired from git base85 implementation
+    while (i < bin_len) {
+        // Translate each 4 byte chunk to 32bit integer
+        uint32_t value = 0;
+        for (int cnt = 24; cnt >= 0; cnt -= 8) {
+            value |= bin_data[i] << cnt;
+            if (++i == bin_len) {
+                // Number of bytes under the 4 bytes rounded value
+                padding = cnt / 8;
+                break;
+            }
+        }
+
+        // Handle NULL only and space-only cases (specific to ASCII85)
+        if (foldnuls && value == 0) {
+            *ascii_data++ = 'z';
+        }
+        else if (foldspaces && value == 0x20202020) {
+            *ascii_data++ = 'y';
+        }
+        else {
+            for (int j = 4; j >= 0; j--) {
+                ascii_data[j] = table[value % 85];
+                value /= 85;
+            }
+            ascii_data += 5;
+        }
+    }
+
+    // Expand the last folded null in case it did not fill a full chunk
+    if (padding && !pad && foldnuls && ascii_data[-1] == 'z') {
+        ascii_data--;
+        memset(ascii_data, table[0], 5);
+        ascii_data += 5;
+    }
+
+    if (!pad) {
+        ascii_data -= padding;
+    }
+
+    return _PyBytesWriter_Finish(&writer, ascii_data);
+}
+
 /* List of functions defined in the module */
 
 static struct PyMethodDef binascii_module_methods[] = {
     BINASCII_A2B_UU_METHODDEF
     BINASCII_B2A_UU_METHODDEF
     BINASCII_A2B_BASE64_METHODDEF
     BINASCII_B2A_BASE64_METHODDEF
+    BINASCII__B2A_BASE85_METHODDEF
     BINASCII_A2B_HEX_METHODDEF
     BINASCII_B2A_HEX_METHODDEF
     BINASCII_HEXLIFY_METHODDEF

diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h