From 84e20ea4953ea25ba8d228d405fb9e7ac0ccf20b Mon Sep 17 00:00:00 2001
From: Romuald Brunet <romuald@chivil.com>
Date: Sun, 16 Feb 2025 15:37:36 +0100
Subject: [PATCH 1/7] gh-101178: C implementation of base64._a85encode

Initially done to reduce the huge memory consumption of the previous
implementation for large inputs, and that no memory-friendly python way was
found that did not include a performance regression

This implementation also greatly improve performance in all cases

Signed-off-by: Romuald Brunet <romuald@chivil.com>
---
 Lib/base64.py               |  35 +++---------
 Modules/binascii.c          |  88 ++++++++++++++++++++++++++++
 Modules/clinic/binascii.c.h | 111 +++++++++++++++++++++++++++++++++++-
 3 files changed, 207 insertions(+), 27 deletions(-)

diff --git a/Lib/base64.py b/Lib/base64.py
index 5d78cc09f40cd3..1de57ef9cd97a3 100644
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -298,27 +298,12 @@ def b16decode(s, casefold=False):
 
 def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
     # Helper function for a85encode and b85encode
+    # chars2 is now unused
     if not isinstance(b, bytes_types):
         b = memoryview(b).tobytes()
 
-    padding = (-len(b)) % 4
-    if padding:
-        b = b + b'\0' * padding
-    words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
-
-    chunks = [b'z' if foldnuls and not word else
-              b'y' if foldspaces and word == 0x20202020 else
-              (chars2[word // 614125] +
-               chars2[word // 85 % 7225] +
-               chars[word % 85])
-              for word in words]
-
-    if padding and not pad:
-        if chunks[-1] == b'z':
-            chunks[-1] = chars[0] * 5
-        chunks[-1] = chunks[-1][:-padding]
-
-    return b''.join(chunks)
+    return binascii.b2a_base85(b, chars=chars, pad=pad,
+                               foldnuls=foldnuls, foldspaces=foldspaces)
 
 def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
     """Encode bytes-like object b using Ascii85 and return a bytes object.
@@ -337,14 +322,13 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
     adobe controls whether the encoded byte sequence is framed with <~ and ~>,
     which is used by the Adobe implementation.
     """
-    global _a85chars, _a85chars2
+    global _a85chars
     # Delay the initialization of tables to not waste memory
     # if the function is never called
-    if _a85chars2 is None:
+    if _a85chars is None:
         _a85chars = [bytes((i,)) for i in range(33, 118)]
-        _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
 
-    result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
+    result = _85encode(b, b''.join(_a85chars), None, pad, True, foldspaces)
 
     if adobe:
         result = _A85START + result
@@ -445,13 +429,12 @@ def b85encode(b, pad=False):
     If pad is true, the input is padded with b'\\0' so its length is a multiple of
     4 bytes before encoding.
     """
-    global _b85chars, _b85chars2
+    global _b85chars
     # Delay the initialization of tables to not waste memory
     # if the function is never called
-    if _b85chars2 is None:
+    if _b85chars is None:
         _b85chars = [bytes((i,)) for i in _b85alphabet]
-        _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
-    return _85encode(b, _b85chars, _b85chars2, pad)
+    return _85encode(b, _b85alphabet, None, pad)
 
 def b85decode(b):
     """Decode the base85-encoded bytes-like object or ASCII string b
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 6bb01d148b6faa..8d80abdb9268a1 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1239,6 +1239,93 @@ binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
     return rv;
 }
 
+/*[clinic input]
+binascii.b2a_base85
+
+    data: Py_buffer
+    chars: Py_buffer
+    pad: bool = False
+    foldnuls: bool = False
+    foldspaces: bool = False
+
+Utility method used by the base64 module to encode a85/b85 data
+
+    data: bytes
+    chars: 85 bytes conversion table
+    pad: use NULL-paded input if necessary
+    foldnuls: replace NULL chunks by 'z'
+    foldspaces: replace space-only chucks by 'y'
+
+[clinic start generated code]*/
+
+static PyObject *
+binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
+                         int pad, int foldnuls, int foldspaces)
+/*[clinic end generated code: output=0a92b3c535580aa0 input=a2d8ae712ed5adba]*/
+{
+    if (chars->len != 85) {
+        PyErr_SetString(PyExc_ValueError,
+                        "chars must be exactly 85 bytes long");
+        return NULL;
+    }
+
+    _PyBytesWriter writer;
+    _PyBytesWriter_Init(&writer);
+
+    const size_t bin_len = data->len;
+
+    // Allocate up to maxium encoded length, adjusted at end
+    const size_t ascii_len = ((bin_len + 3) / 4) * 5;
+
+    unsigned char *ascii_data = _PyBytesWriter_Alloc(&writer, ascii_len);
+    if (ascii_data == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    const unsigned char *table = chars->buf;
+    const unsigned char *bin_data = data->buf;
+
+    size_t i, j;
+    for (i = 0; i < bin_len; i += 4) {
+        const size_t chunk_size = (bin_len - i >= 4) ? 4 : (bin_len - i);
+
+        // translate chunk to 32bit integer
+        uint32_t value = 0;
+        for (j = 0; j < chunk_size; j++) {
+            value = (value << 8) | bin_data[i + j];
+        }
+        value <<= (4 - chunk_size) * 8;
+
+        if (foldnuls && value == 0) {
+            *ascii_data++ = 'z';
+        } else if (foldspaces && value == 0x20202020) {
+            *ascii_data++ = 'y';
+        } else {
+            for (j = 0; j < 5 ; j++) {
+                ascii_data[4 - j] = table[value % 85];
+                value /= 85;
+            }
+            ascii_data += 5;
+        }
+    }
+
+    // In case `i` went over the input size, we may need to shorten the output
+    const size_t overflow = (i - bin_len);
+
+    if (overflow && !pad && foldnuls && ascii_data[-1] == 'z') {
+        ascii_data--;
+        memset(ascii_data, table[0], 5);
+        ascii_data += 5;
+    }
+
+    if (!pad) {
+        ascii_data -= overflow;
+    }
+
+    return _PyBytesWriter_Finish(&writer, ascii_data);
+}
+
 /* List of functions defined in the module */
 
 static struct PyMethodDef binascii_module_methods[] = {
@@ -1246,6 +1333,7 @@ static struct PyMethodDef binascii_module_methods[] = {
     BINASCII_B2A_UU_METHODDEF
     BINASCII_A2B_BASE64_METHODDEF
     BINASCII_B2A_BASE64_METHODDEF
+    BINASCII_B2A_BASE85_METHODDEF
     BINASCII_A2B_HEX_METHODDEF
     BINASCII_B2A_HEX_METHODDEF
     BINASCII_HEXLIFY_METHODDEF
diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h
index f81f12c388f373..791db5864a8a4c 100644
--- a/Modules/clinic/binascii.c.h
+++ b/Modules/clinic/binascii.c.h
@@ -774,4 +774,113 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj
 
     return return_value;
 }
-/*[clinic end generated code: output=9ed7fbeec13c6606 input=a9049054013a1b77]*/
+
+PyDoc_STRVAR(binascii_b2a_base85__doc__,
+"b2a_base85($module, /, data, chars, pad=False, foldnuls=False,\n"
+"           foldspaces=False)\n"
+"--\n"
+"\n"
+"Utility method used by the base64 module to encode a85/b85 data\n"
+"\n"
+"    data: bytes\n"
+"    chars: 85 bytes conversion table\n"
+"    pad: use NULL-paded input if necessary\n"
+"    foldnuls: replace NULL chunks by \'z\'\n"
+"    foldspaces: replace space-only chucks by \'y\'");
+
+#define BINASCII_B2A_BASE85_METHODDEF    \
+    {"b2a_base85", _PyCFunction_CAST(binascii_b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base85__doc__},
+
+static PyObject *
+binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
+                         int pad, int foldnuls, int foldspaces);
+
+static PyObject *
+binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 5
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(data), &_Py_ID(chars), &_Py_ID(pad), &_Py_ID(foldnuls), &_Py_ID(foldspaces), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"data", "chars", "pad", "foldnuls", "foldspaces", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "b2a_base85",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[5];
+    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2;
+    Py_buffer data = {NULL, NULL};
+    Py_buffer chars = {NULL, NULL};
+    int pad = 0;
+    int foldnuls = 0;
+    int foldspaces = 0;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+            /*minpos*/ 2, /*maxpos*/ 5, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
+        goto exit;
+    }
+    if (PyObject_GetBuffer(args[1], &chars, PyBUF_SIMPLE) != 0) {
+        goto exit;
+    }
+    if (!noptargs) {
+        goto skip_optional_pos;
+    }
+    if (args[2]) {
+        pad = PyObject_IsTrue(args[2]);
+        if (pad < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_pos;
+        }
+    }
+    if (args[3]) {
+        foldnuls = PyObject_IsTrue(args[3]);
+        if (foldnuls < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_pos;
+        }
+    }
+    foldspaces = PyObject_IsTrue(args[4]);
+    if (foldspaces < 0) {
+        goto exit;
+    }
+skip_optional_pos:
+    return_value = binascii_b2a_base85_impl(module, &data, &chars, pad, foldnuls, foldspaces);
+
+exit:
+    /* Cleanup for data */
+    if (data.obj) {
+       PyBuffer_Release(&data);
+    }
+    /* Cleanup for chars */
+    if (chars.obj) {
+       PyBuffer_Release(&chars);
+    }
+
+    return return_value;
+}
+/*[clinic end generated code: output=ae4488d2f300a0ff input=a9049054013a1b77]*/

From 74fc245760ddeff612f7866a4ced3afb77cafc13 Mon Sep 17 00:00:00 2001
From: Romuald Brunet <romuald@chivil.com>
Date: Sun, 16 Feb 2025 15:38:26 +0100
Subject: [PATCH 2/7] Add possible regression test in test_base64

Regression was found while testing the new C implementation, when foldspaces
was used with b85encode (since a chunk could end in z without having been
folded)
---
 Lib/test/test_base64.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 409c8c109e885f..d06fd58d39b628 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -528,6 +528,7 @@ def test_b85encode(self):
                 b"""0123456789!@#0^&*();:<>,. []{}""":
                 b"""VPa!sWoBn+X=-b1ZEkOHadLBXb#`}nd3r%YLqtVJM@UIZOH55pPf$@("""
                 b"""Q&d$}S6EqEFflSSG&MFiI5{CeBQRbjDkv#CIy^osE+AW7dwl""",
+            b"paddu\xc7": b'aA9O*b;k',
             b'no padding..': b'Zf_uPVPs@!Zf7no',
             b'zero compression\x00\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG00000',
             b'zero compression\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG0000',

From 60a3ae64c885a04907841522de8436633e58db6d Mon Sep 17 00:00:00 2001
From: Romuald Brunet <romuald@chivil.com>
Date: Sun, 16 Feb 2025 16:43:26 +0100
Subject: [PATCH 3/7] Review changes Modules/binascii.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply suggestions

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
---
 Modules/binascii.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Modules/binascii.c b/Modules/binascii.c
index 8d80abdb9268a1..7b0885a5d088ae 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1299,9 +1299,11 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
 
         if (foldnuls && value == 0) {
             *ascii_data++ = 'z';
-        } else if (foldspaces && value == 0x20202020) {
+        }
+        else if (foldspaces && value == 0x20202020) {
             *ascii_data++ = 'y';
-        } else {
+        }
+        else {
             for (j = 0; j < 5 ; j++) {
                 ascii_data[4 - j] = table[value % 85];
                 value /= 85;

From aaa09e16e3fd2d2e38ab63d041b957485113fbca Mon Sep 17 00:00:00 2001
From: Romuald Brunet <romuald@chivil.com>
Date: Sun, 16 Feb 2025 20:33:57 +0100
Subject: [PATCH 4/7] Review fixes: update algorithm

Inspired from git source https://github.com/git/git/blob/03944513488db4a81fdb4c21c3b515e4cb260b05/base85.c#L79

This avoid checking the chunk size on every iteration and thus improves performance
---
 Modules/binascii.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/Modules/binascii.c b/Modules/binascii.c
index 7b0885a5d088ae..97d75b9ef0414d 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1286,16 +1286,20 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
     const unsigned char *table = chars->buf;
     const unsigned char *bin_data = data->buf;
 
-    size_t i, j;
-    for (i = 0; i < bin_len; i += 4) {
-        const size_t chunk_size = (bin_len - i >= 4) ? 4 : (bin_len - i);
+    size_t i = 0 ;
+    int padding = 0;
 
-        // translate chunk to 32bit integer
+    while (i < bin_len) {
+        // translate each 4 byte chunk to 32bit integer
         uint32_t value = 0;
-        for (j = 0; j < chunk_size; j++) {
-            value = (value << 8) | bin_data[i + j];
+        for (int cnt = 24; cnt >= 0; cnt -= 8) {
+            value |= bin_data[i] << cnt;
+            if (++i == bin_len) {
+                // Number of bytes under the 4 bytes rounded value
+                padding = cnt / 8;
+                break;
+            }
         }
-        value <<= (4 - chunk_size) * 8;
 
         if (foldnuls && value == 0) {
             *ascii_data++ = 'z';
@@ -1304,7 +1308,7 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
             *ascii_data++ = 'y';
         }
         else {
-            for (j = 0; j < 5 ; j++) {
+            for (int j = 0; j < 5 ; j++) {
                 ascii_data[4 - j] = table[value % 85];
                 value /= 85;
             }
@@ -1312,17 +1316,14 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
         }
     }
 
-    // In case `i` went over the input size, we may need to shorten the output
-    const size_t overflow = (i - bin_len);
-
-    if (overflow && !pad && foldnuls && ascii_data[-1] == 'z') {
+    if (padding && !pad && foldnuls && ascii_data[-1] == 'z') {
         ascii_data--;
         memset(ascii_data, table[0], 5);
         ascii_data += 5;
     }
 
     if (!pad) {
-        ascii_data -= overflow;
+        ascii_data -= padding;
     }
 
     return _PyBytesWriter_Finish(&writer, ascii_data);

From cb46a5db57abd8b12d790f454e07dbf1d4a244c5 Mon Sep 17 00:00:00 2001
From: Romuald Brunet <romuald@chivil.com>
Date: Mon, 17 Feb 2025 08:27:44 +0100
Subject: [PATCH 5/7] Further plagiate git's implementation

Since j is not unsigned anymore we can reverse the table lookup loop
---
 Modules/binascii.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Modules/binascii.c b/Modules/binascii.c
index 97d75b9ef0414d..ba1eb6ca61073c 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1308,8 +1308,8 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
             *ascii_data++ = 'y';
         }
         else {
-            for (int j = 0; j < 5 ; j++) {
-                ascii_data[4 - j] = table[value % 85];
+            for (int j = 4; j >= 0; j--) {
+                ascii_data[j] = table[value % 85];
                 value /= 85;
             }
             ascii_data += 5;

From c88450ba34e4ba9c4fbbe470a6c54a5a3bcf709d Mon Sep 17 00:00:00 2001
From: Romuald Brunet <romuald@chivil.com>
Date: Mon, 17 Feb 2025 22:47:45 +0100
Subject: [PATCH 6/7] Rename b2a_base85 as private for now

---
 Lib/base64.py               |  4 ++--
 Modules/binascii.c          | 11 ++++++-----
 Modules/clinic/binascii.c.h | 23 ++++++++++++-----------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/Lib/base64.py b/Lib/base64.py
index 1de57ef9cd97a3..8ab27441ce482f 100644
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -302,8 +302,8 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
     if not isinstance(b, bytes_types):
         b = memoryview(b).tobytes()
 
-    return binascii.b2a_base85(b, chars=chars, pad=pad,
-                               foldnuls=foldnuls, foldspaces=foldspaces)
+    return binascii._b2a_base85(b, chars=chars, pad=pad,
+                                foldnuls=foldnuls, foldspaces=foldspaces)
 
 def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
     """Encode bytes-like object b using Ascii85 and return a bytes object.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index ba1eb6ca61073c..67a0954e1c1944 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1240,7 +1240,7 @@ binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
 }
 
 /*[clinic input]
-binascii.b2a_base85
+binascii._b2a_base85
 
     data: Py_buffer
     chars: Py_buffer
@@ -1259,9 +1259,10 @@ Utility method used by the base64 module to encode a85/b85 data
 [clinic start generated code]*/
 
 static PyObject *
-binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
-                         int pad, int foldnuls, int foldspaces)
-/*[clinic end generated code: output=0a92b3c535580aa0 input=a2d8ae712ed5adba]*/
+binascii__b2a_base85_impl(PyObject *module, Py_buffer *data,
+                          Py_buffer *chars, int pad, int foldnuls,
+                          int foldspaces)
+/*[clinic end generated code: output=cefe84c300ad7314 input=3c8faf77b992dcc2]*/
 {
     if (chars->len != 85) {
         PyErr_SetString(PyExc_ValueError,
@@ -1336,7 +1337,7 @@ static struct PyMethodDef binascii_module_methods[] = {
     BINASCII_B2A_UU_METHODDEF
     BINASCII_A2B_BASE64_METHODDEF
     BINASCII_B2A_BASE64_METHODDEF
-    BINASCII_B2A_BASE85_METHODDEF
+    BINASCII__B2A_BASE85_METHODDEF
     BINASCII_A2B_HEX_METHODDEF
     BINASCII_B2A_HEX_METHODDEF
     BINASCII_HEXLIFY_METHODDEF
diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h
index 791db5864a8a4c..3e5a22b4ee4433 100644
--- a/Modules/clinic/binascii.c.h
+++ b/Modules/clinic/binascii.c.h
@@ -775,9 +775,9 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj
     return return_value;
 }
 
-PyDoc_STRVAR(binascii_b2a_base85__doc__,
-"b2a_base85($module, /, data, chars, pad=False, foldnuls=False,\n"
-"           foldspaces=False)\n"
+PyDoc_STRVAR(binascii__b2a_base85__doc__,
+"_b2a_base85($module, /, data, chars, pad=False, foldnuls=False,\n"
+"            foldspaces=False)\n"
 "--\n"
 "\n"
 "Utility method used by the base64 module to encode a85/b85 data\n"
@@ -788,15 +788,16 @@ PyDoc_STRVAR(binascii_b2a_base85__doc__,
 "    foldnuls: replace NULL chunks by \'z\'\n"
 "    foldspaces: replace space-only chucks by \'y\'");
 
-#define BINASCII_B2A_BASE85_METHODDEF    \
-    {"b2a_base85", _PyCFunction_CAST(binascii_b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base85__doc__},
+#define BINASCII__B2A_BASE85_METHODDEF    \
+    {"_b2a_base85", _PyCFunction_CAST(binascii__b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii__b2a_base85__doc__},
 
 static PyObject *
-binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars,
-                         int pad, int foldnuls, int foldspaces);
+binascii__b2a_base85_impl(PyObject *module, Py_buffer *data,
+                          Py_buffer *chars, int pad, int foldnuls,
+                          int foldspaces);
 
 static PyObject *
-binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+binascii__b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
     #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
@@ -820,7 +821,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
     static const char * const _keywords[] = {"data", "chars", "pad", "foldnuls", "foldspaces", NULL};
     static _PyArg_Parser _parser = {
         .keywords = _keywords,
-        .fname = "b2a_base85",
+        .fname = "_b2a_base85",
         .kwtuple = KWTUPLE,
     };
     #undef KWTUPLE
@@ -869,7 +870,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
         goto exit;
     }
 skip_optional_pos:
-    return_value = binascii_b2a_base85_impl(module, &data, &chars, pad, foldnuls, foldspaces);
+    return_value = binascii__b2a_base85_impl(module, &data, &chars, pad, foldnuls, foldspaces);
 
 exit:
     /* Cleanup for data */
@@ -883,4 +884,4 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
 
     return return_value;
 }
-/*[clinic end generated code: output=ae4488d2f300a0ff input=a9049054013a1b77]*/
+/*[clinic end generated code: output=a1f5ae9968e8e52d input=a9049054013a1b77]*/

From 2fc892cd05efbbaff2e0c4a171cdda155e26720a Mon Sep 17 00:00:00 2001
From: Romuald Brunet <romuald@chivil.com>
Date: Mon, 17 Feb 2025 22:54:45 +0100
Subject: [PATCH 7/7] Credit to git's implementation and more comments

---
 Modules/binascii.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Modules/binascii.c b/Modules/binascii.c
index 67a0954e1c1944..bd67a656d2b8a2 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1290,8 +1290,9 @@ binascii__b2a_base85_impl(PyObject *module, Py_buffer *data,
     size_t i = 0 ;
     int padding = 0;
 
+    // Conversion largely inspired from git base85 implementation
     while (i < bin_len) {
-        // translate each 4 byte chunk to 32bit integer
+        // Translate each 4 byte chunk to 32bit integer
         uint32_t value = 0;
         for (int cnt = 24; cnt >= 0; cnt -= 8) {
             value |= bin_data[i] << cnt;
@@ -1302,6 +1303,7 @@ binascii__b2a_base85_impl(PyObject *module, Py_buffer *data,
             }
         }
 
+        // Handle NULL only and space-only cases (specific to ASCII85)
         if (foldnuls && value == 0) {
             *ascii_data++ = 'z';
         }
@@ -1317,6 +1319,7 @@ binascii__b2a_base85_impl(PyObject *module, Py_buffer *data,
         }
     }
 
+    // Expand the last folded null in case it did not fill a full chunk
     if (padding && !pad && foldnuls && ascii_data[-1] == 'z') {
         ascii_data--;
         memset(ascii_data, table[0], 5);